Exemplo n.º 1
0
 def test_book_info_with_sample(self):
     raw, info = self.sample_json("has_sample.json")
     metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info)
     [sample] = [x for x in metadata.links if x.rel == Hyperlink.SAMPLE]
     eq_(
         "http://excerpts.contentreserve.com/FormatType-410/1071-1/9BD/24F/82/BridesofConvenienceBundle9781426803697.epub",
         sample.href)
Exemplo n.º 2
0
    def process_identifier(self, identifier):
        # What is the correct medium?
        correct_medium = None
        lp = identifier.licensed_through
        for lpdm in lp.delivery_mechanisms:
            correct_medium = lpdm.delivery_mechanism.implicit_medium
            if correct_medium:
                break
        if not correct_medium and identifier.type == Identifier.OVERDRIVE_ID:
            content = self.overdrive.metadata_lookup(identifier)
            metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
                content)
            correct_medium = metadata.medium

        if not correct_medium and identifier.type == Identifier.THREEM_ID:
            metadata = self.threem.bibliographic_lookup(identifier)
            correct_medium = metadata.medium

        if not correct_medium:
            set_trace()

        if lp.edition.medium != correct_medium:
            print "%s is actually %s, not %s" % (
                lp.edition.title, correct_medium, lp.edition.medium)
            lp.edition.medium = correct_medium or Edition.BOOK_MEDIUM
Exemplo n.º 3
0
 def test_availability_info(self):
     data, raw = self.sample_json("overdrive_book_list.json")
     availability = OverdriveRepresentationExtractor.availability_link_list(
         raw)
     for item in availability:
         for key in 'availability_link', 'id', 'title':
             assert key in item
Exemplo n.º 4
0
    def test_availability_info_missing_data(self):
        data, raw = self.sample_json("overdrive_book_list_missing_data.json")
        [item] = OverdriveRepresentationExtractor.availability_link_list(raw)

        # We got a data structure for the item that has an ID but no title.
        # We did not get a data structure for the item that has a title
        # but no ID.
        eq_('title is missing', item['id'])
        eq_(None, item['title'])
Exemplo n.º 5
0
    def test_book_info_with_awards(self):
        raw, info = self.sample_json("has_awards.json")
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info)

        [awards] = [
            x for x in metadata.measurements
            if Measurement.AWARDS == x.quantity_measured
        ]
        eq_(1, awards.value)
        eq_(1, awards.weight)
Exemplo n.º 6
0
    def test_book_info_with_grade_levels(self):
        raw, info = self.sample_json("has_grade_levels.json")
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info)

        grade_levels = sorted([
            x.identifier for x in metadata.subjects
            if x.type == Subject.GRADE_LEVEL
        ])
        eq_([u'Grade 4', u'Grade 5', u'Grade 6', u'Grade 7', u'Grade 8'],
            grade_levels)
Exemplo n.º 7
0
    def test_book_info_with_circulationdata(self):
        # Tests that can convert an overdrive json block into a CirculationData object.

        raw, info = self.sample_json("overdrive_availability_information.json")
        circulationdata = OverdriveRepresentationExtractor.book_info_to_circulation(
            info)

        # Related IDs.
        identifier = circulationdata.primary_identifier(self._db)
        eq_((Identifier.OVERDRIVE_ID, '2a005d55-a417-4053-b90d-7a38ca6d2065'),
            (identifier.type, identifier.identifier))
    def process_identifier(self, identifier):

        content = self.api.metadata_lookup(identifier)
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(content)
        if not metadata:
            return
        license_pool = identifier.licensed_through
        for format in metadata.formats:
            print "%s: %s - %s" % (identifier.identifier, format.content_type, format.drm_scheme)
            mech = license_pool.set_delivery_mechanism(
                format.content_type,
                format.drm_scheme,
                format.link
            )
    def process_identifier(self, identifier):

        content = self.api.metadata_lookup(identifier)
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            content)
        if not metadata:
            return
        license_pool = identifier.licensed_through
        for format in metadata.formats:
            print "%s: %s - %s" % (identifier.identifier, format.content_type,
                                   format.drm_scheme)
            mech = license_pool.set_delivery_mechanism(format.content_type,
                                                       format.drm_scheme,
                                                       format.link)
    def process_identifier(self, identifier):
        # What is the correct medium?
        correct_medium = None
        lp = identifier.licensed_through
        for lpdm in lp.delivery_mechanisms:
            correct_medium = lpdm.delivery_mechanism.implicit_medium
            if correct_medium:
                break
        if not correct_medium and identifier.type==Identifier.OVERDRIVE_ID:
            content = self.overdrive.metadata_lookup(identifier)
            metadata = OverdriveRepresentationExtractor.book_info_to_metadata(content)
            correct_medium = metadata.medium

        if not correct_medium and identifier.type==Identifier.THREEM_ID:
            metadata = self.threem.bibliographic_lookup(identifier)
            correct_medium = metadata.medium

        if not correct_medium:
            set_trace()

        if lp.edition.medium != correct_medium:
            print "%s is actually %s, not %s" % (lp.edition.title, correct_medium, lp.edition.medium)
            lp.edition.medium = correct_medium or Edition.BOOK_MEDIUM
Exemplo n.º 11
0
    def test_book_info_with_metadata(self):
        # Tests that can convert an overdrive json block into a Metadata object.

        raw, info = self.sample_json("overdrive_metadata.json")
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info)

        eq_("Agile Documentation", metadata.title)
        eq_(
            "Agile Documentation A Pattern Guide to Producing Lightweight Documents for Software Projects",
            metadata.sort_title)
        eq_(
            "A Pattern Guide to Producing Lightweight Documents for Software Projects",
            metadata.subtitle)
        eq_(Edition.BOOK_MEDIUM, metadata.medium)
        eq_("Wiley Software Patterns", metadata.series)
        eq_("eng", metadata.language)
        eq_("Wiley", metadata.publisher)
        eq_("John Wiley & Sons, Inc.", metadata.imprint)
        eq_(2005, metadata.published.year)
        eq_(1, metadata.published.month)
        eq_(31, metadata.published.day)

        [author] = metadata.contributors
        eq_(u"Rüping, Andreas", author.sort_name)
        eq_("Andreas Rüping", author.display_name)
        eq_([Contributor.AUTHOR_ROLE], author.roles)

        subjects = sorted(metadata.subjects, key=lambda x: x.identifier)

        eq_([
            ("Computer Technology", Subject.OVERDRIVE, 100),
            ("Nonfiction", Subject.OVERDRIVE, 100),
            ('Object Technologies - Miscellaneous', 'tag', 1),
        ], [(x.identifier, x.type, x.weight) for x in subjects])

        # Related IDs.
        eq_((Identifier.OVERDRIVE_ID, '3896665d-9d81-4cac-bd43-ffc5066de1f5'),
            (metadata.primary_identifier.type,
             metadata.primary_identifier.identifier))

        ids = [(x.type, x.identifier) for x in metadata.identifiers]

        # The original data contains a blank ASIN in addition to the
        # actual ASIN, but it doesn't show up here.
        eq_([
            (Identifier.ASIN, "B000VI88N2"),
            (Identifier.ISBN, "9780470856246"),
            (Identifier.OVERDRIVE_ID, '3896665d-9d81-4cac-bd43-ffc5066de1f5'),
        ], sorted(ids))

        # Available formats.
        [kindle, pdf] = sorted(metadata.circulation.formats,
                               key=lambda x: x.content_type)
        eq_(DeliveryMechanism.KINDLE_CONTENT_TYPE, kindle.content_type)
        eq_(DeliveryMechanism.KINDLE_DRM, kindle.drm_scheme)

        eq_(Representation.PDF_MEDIA_TYPE, pdf.content_type)
        eq_(DeliveryMechanism.ADOBE_DRM, pdf.drm_scheme)

        # Links to various resources.
        shortd, image, longd = sorted(metadata.links, key=lambda x: x.rel)

        eq_(Hyperlink.DESCRIPTION, longd.rel)
        assert longd.content.startswith("<p>Software documentation")

        eq_(Hyperlink.SHORT_DESCRIPTION, shortd.rel)
        assert shortd.content.startswith("<p>Software documentation")
        assert len(shortd.content) < len(longd.content)

        eq_(Hyperlink.IMAGE, image.rel)
        eq_(
            'http://images.contentreserve.com/ImageType-100/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg100.jpg',
            image.href)

        thumbnail = image.thumbnail

        eq_(Hyperlink.THUMBNAIL_IMAGE, thumbnail.rel)
        eq_(
            'http://images.contentreserve.com/ImageType-200/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg200.jpg',
            thumbnail.href)

        # Measurements associated with the book.

        measurements = metadata.measurements
        popularity = [
            x for x in measurements
            if x.quantity_measured == Measurement.POPULARITY
        ][0]
        eq_(2, popularity.value)

        rating = [
            x for x in measurements
            if x.quantity_measured == Measurement.RATING
        ][0]
        eq_(1, rating.value)

        # Request only the bibliographic information.
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            info, include_bibliographic=True, include_formats=False)

        eq_("Agile Documentation", metadata.title)
        eq_(None, metadata.circulation)

        # Request only the format information.
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(
            info, include_bibliographic=False, include_formats=True)

        eq_(None, metadata.title)

        [kindle, pdf] = sorted(metadata.circulation.formats,
                               key=lambda x: x.content_type)
        eq_(DeliveryMechanism.KINDLE_CONTENT_TYPE, kindle.content_type)
        eq_(DeliveryMechanism.KINDLE_DRM, kindle.drm_scheme)

        eq_(Representation.PDF_MEDIA_TYPE, pdf.content_type)
        eq_(DeliveryMechanism.ADOBE_DRM, pdf.drm_scheme)
Exemplo n.º 12
0
 def test_link(self):
     data, raw = self.sample_json("overdrive_book_list.json")
     expect = OverdriveAPI.make_link_safe(
         "http://api.overdrive.com/v1/collections/collection-id/products?limit=300&offset=0&lastupdatetime=2014-04-28%2009:25:09&sort=popularity:desc&formats=ebook-epub-open,ebook-epub-adobe,ebook-pdf-adobe,ebook-pdf-open"
     )
     eq_(expect, OverdriveRepresentationExtractor.link(raw, "first"))
from threem import ThreeMAPI
from core.opds_import import SimplifiedOPDSLookup

lookup = SimplifiedOPDSLookup("http://metadata.alpha.librarysimplified.org/")

_db = production_session()
overdrive = OverdriveAPI(_db)
threem = ThreeMAPI(_db)

q = _db.query(Edition).join(Edition.data_source).filter(
    DataSource.name.in_([DataSource.OVERDRIVE])).filter(Edition.author == '')
print "Fixing %s books." % q.count()
for edition in q:
    if edition.data_source.name == DataSource.OVERDRIVE:
        data = overdrive.metadata_lookup(edition.primary_identifier)
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(data)
    else:
        metadata = threem.bibliographic_lookup(edition.primary_identifier)
    metadata.update_contributions(_db,
                                  edition,
                                  metadata_client=lookup,
                                  replace_contributions=True)
    if edition.work:
        edition.work.calculate_presentation()
    else:
        edition.calculate_presentation()

    for c in edition.contributions:
        print "%s = %s (%s)" % (c.role, c.contributor.display_name,
                                c.contributor.name)
    print edition.author, edition.sort_author
    OverdriveRepresentationExtractor
)
from threem import ThreeMAPI
from core.opds_import import SimplifiedOPDSLookup
lookup = SimplifiedOPDSLookup("http://metadata.alpha.librarysimplified.org/")

_db = production_session()
overdrive = OverdriveAPI(_db)
threem = ThreeMAPI(_db)

q = _db.query(Edition).join(Edition.data_source).filter(DataSource.name.in_([DataSource.OVERDRIVE])).filter(Edition.author=='')
print "Fixing %s books." % q.count()
for edition in q:
    if edition.data_source.name==DataSource.OVERDRIVE:
        data = overdrive.metadata_lookup(edition.primary_identifier)
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(data)
    else:
        metadata = threem.bibliographic_lookup(edition.primary_identifier)
    metadata.update_contributions(_db, edition, metadata_client=lookup,
                                  replace_contributions=True)
    if edition.work:
        edition.work.calculate_presentation()
    else:
        edition.calculate_presentation()

    for c in edition.contributions:
        print "%s = %s (%s)" % (
            c.role, c.contributor.display_name, c.contributor.name
        )
    print edition.author, edition.sort_author
    _db.commit()