Exemplo n.º 1
0
    def __init__(self, _db, overdrive=None, threem=None, axis=None):
        self._db = _db
        self.overdrive = overdrive
        self.threem = threem
        self.axis = axis
        self.apis = [x for x in (overdrive, threem, axis) if x]
        self.log = logging.getLogger("Circulation API")

        # When we get our view of a patron's loans and holds, we need
        # to include loans from all licensed data sources.  We do not
        # need to include loans from open-access sources because we
        # are the authorities on those.
        data_sources_for_sync = []
        if self.overdrive:
            data_sources_for_sync.append(
                DataSource.lookup(_db, DataSource.OVERDRIVE)
            )
        if self.threem:
            data_sources_for_sync.append(
                DataSource.lookup(_db, DataSource.THREEM)
            )
        if self.axis:
            data_sources_for_sync.append(
                DataSource.lookup(_db, DataSource.AXIS_360)
            )

        self.identifier_type_to_data_source_name = dict(
            (ds.primary_identifier_type, ds.name) 
            for ds in data_sources_for_sync)
        self.data_source_ids_for_sync = [
            x.id for x in data_sources_for_sync
        ]
Exemplo n.º 2
0
    def test_items_that_need_coverage(self):
        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        
        # An item that hasn't been covered by the provider yet
        cr = self._coverage_record(self._edition(), other_source)
        
        # An item that has been covered by the reaper operation already
        reaper_cr = self._coverage_record(
            self._edition(), source, operation=CoverageRecord.REAP_OPERATION
        )
        
        # An item that has been covered by the reaper operation, but has
        # had its license repurchased.
        relicensed_edition, relicensed_licensepool = self._edition(with_license_pool=True)
        relicensed_coverage_record = self._coverage_record(
            relicensed_edition, source, operation=CoverageRecord.REAP_OPERATION
        )
        relicensed_licensepool.update_availability(1, 0, 0, 0)

        items = self.provider.items_that_need_coverage().all()
        # Provider ignores anything that has been reaped and doesn't have
        # licenses.
        assert reaper_cr.identifier not in items
        # But it picks up anything that hasn't been covered at all and anything
        # that's been licensed anew even if its already been reaped.
        eq_(2, len(items))
        assert relicensed_licensepool.identifier in items
        assert cr.identifier in items
        # The Wrangler Reaper coverage record is removed from the db
        # when it's committed.
        assert relicensed_coverage_record in relicensed_licensepool.identifier.coverage_records
        self._db.commit()
        assert relicensed_coverage_record not in relicensed_licensepool.identifier.coverage_records
    def test_items_that_need_coverage(self):
        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        other_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        # An item that hasn't been covered by the provider yet
        cr = self._coverage_record(self._edition(), other_source)
        # An item that has been covered by the reaper operation already
        reaper_cr = self._coverage_record(
            self._edition(), source, operation=CoverageRecord.REAP_OPERATION
        )
        # An item that has been covered by the reaper operation, but has
        # had its license repurchased.
        relicensed, relicensed_lp = self._edition(with_license_pool=True)
        self._coverage_record(
            relicensed, source, operation=CoverageRecord.REAP_OPERATION
        )
        relicensed_lp.update_availability(1, 0, 0, 0)

        with temp_config() as config:
            config[Configuration.INTEGRATIONS][Configuration.METADATA_WRANGLER_INTEGRATION] = {
                Configuration.URL : "http://url.gov"
            }
            provider = MetadataWranglerCoverageProvider(self._db)
        items = provider.items_that_need_coverage.all()
        # Provider ignores anything that has been reaped and doesn't have
        # licenses.
        assert reaper_cr.identifier not in items
        # But it picks up anything that hasn't been covered at all and anything
        # that's been licensed anew even if its already been reaped.
        eq_(2, len(items))
        assert relicensed_lp.identifier in items
        assert cr.identifier in items
        # The Wrangler Reaper coverage record is removed from the db
        # when it's committed.
        self._db.commit()
        eq_([], relicensed_lp.identifier.coverage_records)
Exemplo n.º 4
0
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        lp = self._licensepool(
            None, data_source_name=DataSource.BIBLIOTHECA,
            collection=self.collection
        )
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1
        )
        eq_([], lp.identifier.links)
        eq_([], lp.identifier.measurements)

        # Queue some data to be found.
        responses = (
            'metadata_updates_response.opds',
            'metadata_updates_empty_response.opds',
        )
        for filename in responses:
            data = sample_data(filename, 'opds')
            self.lookup.queue_response(
                200, {'content-type' : OPDSFeed.ACQUISITION_FEED_TYPE}, data
            )

        timestamp = self.ts
        new_timestamp = self.monitor.run_once(timestamp)

        # We have a new value to use for the Monitor's timestamp -- the
        # earliest date seen in the last OPDS feed that contained
        # any entries.
        eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp.finish)
        eq_("Editions processed: 1", new_timestamp.achievements)

        # Normally run_once() doesn't update the monitor's timestamp,
        # but this implementation does, so that work isn't redone if
        # run_once() crashes or the monitor is killed.
        eq_(new_timestamp.finish, self.monitor.timestamp().finish)

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        eq_(3, len(lp.identifier.links))
        [quality] = lp.identifier.measurements
        eq_(mw_source, quality.data_source)

        # Check the URLs we processed.
        url1, url2 = [x[0] for x in self.lookup.requests]

        # The first URL processed was the default one for the
        # MetadataWranglerOPDSLookup.
        eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1)

        # The second URL processed was whatever we saw in the 'next' link.
        eq_("http://next-link/", url2)
Exemplo n.º 5
0
    def test_finalize_edition(self):

        provider_no_presentation_ready = self._provider(presentation_ready_on_success=False)
        provider_presentation_ready = self._provider(presentation_ready_on_success=True)
        identifier = self._identifier()
        source = DataSource.lookup(self._db, DataSource.GUTENBERG)

        # Here's an Edition with no LicensePool.
        edition, is_new = Edition.for_foreign_id(
            self._db, source, identifier.type, identifier.identifier
        )
        edition.title = self._str

        # This will effectively do nothing.
        provider_no_presentation_ready.finalize_edition(edition)

        # No Works have been created.
        eq_(0, self._db.query(Work).count())

        # But if there's also a LicensePool...
        pool, is_new = LicensePool.for_foreign_id(
            self._db, source, identifier.type, identifier.identifier
        )

        # finalize_edition() will create a Work.
        provider_no_presentation_ready.finalize_edition(edition)

        work = pool.work
        eq_(work, edition.work)
        eq_(False, work.presentation_ready)

        # If the provider is configured to do so, finalize_edition()
        # will also set the Work as presentation-ready.
        provider_presentation_ready.finalize_edition(edition)
        eq_(True, work.presentation_ready)
Exemplo n.º 6
0
    def test_load_cover_link(self):
        # Create a directory import script with an empty mock filesystem.
        script = MockDirectoryImportScript(self._db, {})

        identifier = self._identifier(Identifier.GUTENBERG_ID, "2345")
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        mirror = MockS3Uploader()
        args = (identifier, gutenberg, "covers", mirror)

        # There is nothing on the mock filesystem, so in this case
        # load_cover_link returns None.
        eq_(None, script.load_cover_link(*args))

        # But we tried.
        eq_(
            ('2345', 'covers', Representation.COMMON_IMAGE_EXTENSIONS,
             'cover image'),
            script._locate_file_args
        )

        # Try another script that has a populated mock filesystem.
        mock_filesystem = {
            'covers' : (
                'acover.jpeg', Representation.JPEG_MEDIA_TYPE, "I'm an image."
            )
        }
        script = MockDirectoryImportScript(self._db, mock_filesystem)
        link = script.load_cover_link(*args)
        eq_(Hyperlink.IMAGE, link.rel)
        assert link.href.endswith(
            '/test.cover.bucket/Gutenberg/Gutenberg+ID/2345/2345.jpg'
        )
        eq_(Representation.JPEG_MEDIA_TYPE, link.media_type)
        eq_("I'm an image.", link.content)
Exemplo n.º 7
0
 def __init__(self, _db, data_directory):
     self._db = _db
     self.collection = Collection.by_protocol(self._db, ExternalIntegration.GUTENBERG).one()
     self.source = DataSource.lookup(self._db, DataSource.GUTENBERG)
     self.data_directory = data_directory
     self.catalog_path = os.path.join(self.data_directory, self.FILENAME)
     self.log = logging.getLogger("Gutenberg API")
Exemplo n.º 8
0
    def lookup_equivalent_isbns(self, identifier):
        """Finds NoveList data for all ISBNs equivalent to an identifier.

        :return: Metadata object or None
        """
        lookup_metadata = []
        license_sources = DataSource.license_sources_for(self._db, identifier)
        # Look up strong ISBN equivalents.
        for license_source in license_sources:
            lookup_metadata += [self.lookup(eq.output)
                for eq in identifier.equivalencies
                if (eq.data_source==license_source and eq.strength==1
                    and eq.output.type==Identifier.ISBN)]

        if not lookup_metadata:
            self.log.error(
                "Identifiers without an ISBN equivalent can't \
                be looked up with NoveList: %r", identifier
            )
            return None

        # Remove None values.
        lookup_metadata = [metadata for metadata in lookup_metadata if metadata]
        if not lookup_metadata:
            return None

        best_metadata = self.choose_best_metadata(lookup_metadata, identifier)
        if not best_metadata:
            metadata, confidence = best_metadata
            if round(confidence, 2) < 0.5:
                self.log.warn(self.NO_ISBN_EQUIVALENCY, identifier)
                return None
        return metadata
Exemplo n.º 9
0
    def __init__(self, _db, lookup=None, input_identifier_types=None, 
                 operation=None, **kwargs):
        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
            ]
        output_source = DataSource.lookup(
            _db, DataSource.METADATA_WRANGLER
        )
        super(MetadataWranglerCoverageProvider, self).__init__(
            lookup = lookup or SimplifiedOPDSLookup.from_config(),
            service_name=self.SERVICE_NAME,
            input_identifier_types=input_identifier_types,
            output_source=output_source,
            operation=operation or self.OPERATION,
            **kwargs
        )

        if not self.lookup.authenticated:
            self.log.warn(
                "Authentication for the Library Simplified Metadata Wrangler "
                "is not set up. You can still use the metadata wrangler, but "
                "it will not know which collection you're asking about."
            )
Exemplo n.º 10
0
    def metadata_needed_for(self, collection_details):
        """Returns identifiers in the collection that could benefit from
        distributor metadata on the circulation manager.
        """
        client = authenticated_client_from_request(self._db)
        if isinstance(client, ProblemDetail):
            return client

        collection = collection_from_details(
            self._db, client, collection_details
        )

        resolver = IdentifierResolutionCoverageProvider
        unresolved_identifiers = collection.unresolved_catalog(
            self._db, resolver.DATA_SOURCE_NAME, resolver.OPERATION
        )

        # Omit identifiers that currently have metadata pending for
        # the IntegrationClientCoverImageCoverageProvider.
        data_source = DataSource.lookup(
            self._db, collection.name, autocreate=True
        )
        is_awaiting_metadata = self._db.query(
            CoverageRecord.id, CoverageRecord.identifier_id
        ).filter(
            CoverageRecord.data_source_id==data_source.id,
            CoverageRecord.status==CoverageRecord.REGISTERED,
            CoverageRecord.operation==IntegrationClientCoverImageCoverageProvider.OPERATION,
        ).subquery()

        unresolved_identifiers = unresolved_identifiers.outerjoin(
            is_awaiting_metadata,
            Identifier.id==is_awaiting_metadata.c.identifier_id
        ).filter(is_awaiting_metadata.c.id==None)

        # Add a message for each unresolved identifier
        pagination = load_pagination_from_request(default_size=25)
        feed_identifiers = pagination.apply(unresolved_identifiers).all()
        messages = list()
        for identifier in feed_identifiers:
            messages.append(OPDSMessage(
                identifier.urn, HTTP_ACCEPTED, "Metadata needed."
            ))

        title = "%s Metadata Requests for %s" % (collection.protocol, client.url)
        metadata_request_url = self.collection_feed_url(
            'metadata_needed_for', collection
        )

        request_feed = AcquisitionFeed(
            self._db, title, metadata_request_url, [], VerboseAnnotator,
            precomposed_entries=messages
        )

        self.add_pagination_links_to_feed(
            pagination, unresolved_identifiers, request_feed,
            'metadata_needed_for', collection
        )

        return feed_response(request_feed)
Exemplo n.º 11
0
    def test_handle_import_messages(self):
        data_source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        provider = OPDSImportCoverageProvider("name", [], data_source)

        message = StatusMessage(201, "try again later")
        message2 = StatusMessage(404, "we're doomed")
        message3 = StatusMessage(200, "everything's fine")

        identifier = self._identifier()
        identifier2 = self._identifier()
        identifier3 = self._identifier()

        messages_by_id = { identifier.urn: message,
                           identifier2.urn: message2,
                           identifier3.urn: message3,
        }

        [f1, f2] = sorted(list(provider.handle_import_messages(messages_by_id)),
                          key=lambda x: x.exception)
        eq_(identifier, f1.obj)
        eq_("201: try again later", f1.exception)
        eq_(True, f1.transient)

        eq_(identifier2, f2.obj)
        eq_("404: we're doomed", f2.exception)
        eq_(False, f2.transient)
Exemplo n.º 12
0
 def _provider(self, presentation_ready_on_success=True):
     """Create a generic MockOPDSImportCoverageProvider for testing purposes."""
     source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)
     return MockOPDSImportCoverageProvider(
         "mock provider", [], source,
         presentation_ready_on_success=presentation_ready_on_success
     )
Exemplo n.º 13
0
    def test_items_that_need_coverage_respects_cutoff(self):
        """Verify that this coverage provider respects the cutoff_time
        argument.
        """

        source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        edition = self._edition()
        cr = self._coverage_record(edition, source, operation='sync')

        # We have a coverage record already, so this book doesn't show
        # up in items_that_need_coverage
        items = self.provider.items_that_need_coverage().all()
        eq_([], items)

        # But if we send a cutoff_time that's later than the time
        # associated with the coverage record...
        one_hour_from_now = (
            datetime.datetime.utcnow() + datetime.timedelta(seconds=3600)
        )
        provider_with_cutoff = self.create_provider(
            cutoff_time=one_hour_from_now
        )

        # The book starts showing up in items_that_need_coverage.
        eq_([edition.primary_identifier], 
            provider_with_cutoff.items_that_need_coverage().all())
Exemplo n.º 14
0
    def __init__(self, _db, input_identifier_types=None, metadata_lookup=None,
                 cutoff_time=None, operation=None):
        self._db = _db
        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
            ]
        self.output_source = DataSource.lookup(
            self._db, DataSource.METADATA_WRANGLER
        )

        if not metadata_lookup:
            metadata_lookup = SimplifiedOPDSLookup.from_config()
        self.lookup = metadata_lookup

        if not operation:
            operation = CoverageRecord.SYNC_OPERATION
        self.operation = operation

        super(MetadataWranglerCoverageProvider, self).__init__(
            self.service_name,
            input_identifier_types,
            self.output_source,
            workset_size=20,
            cutoff_time=cutoff_time,
            operation=self.operation,
        )
Exemplo n.º 15
0
    def generate_mock_api(self):
        """Prep an empty NoveList result."""
        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        metadata = Metadata(source)

        mock_api = MockNoveListAPI(self._db)
        mock_api.setup(metadata)
        return mock_api
Exemplo n.º 16
0
 def __init__(self, _db, testing=False, api=None):
     super(ThreeMCirculationSweep, self).__init__(
         _db, "3M Circulation Sweep", batch_size=25)
     self._db = _db
     if not api:
         api = ThreeMAPI(self._db, testing=testing)
     self.api = api
     self.data_source = DataSource.lookup(self._db, DataSource.THREEM)
Exemplo n.º 17
0
 def setup(self):
     super(TestMetadataWranglerCollectionReaper, self).setup()
     self.source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
     with temp_config() as config:
         config[Configuration.INTEGRATIONS][Configuration.METADATA_WRANGLER_INTEGRATION] = {
             Configuration.URL : "http://url.gov"
         }
         self.reaper = MetadataWranglerCollectionReaper(self._db)
 def __init__(self, _db, authenticator, node_value,
              temporary_token_duration=None):
     self._db = _db
     self.authenticator = authenticator
     self.data_source = DataSource.lookup(_db, DataSource.ADOBE)
     self.temporary_token_duration = (
         temporary_token_duration or datetime.timedelta(minutes=10))
     if isinstance(node_value, basestring):
         node_value = int(node_value, 16)
     self.node_value = node_value
Exemplo n.º 19
0
    def __init__(self, db, mirrors, uploader=None):
        self._db = db
        self.data_source_ids = []
        self.uploader = uploader or S3Uploader()
        self.log = logging.getLogger("Cover Image Scaler")

        for mirror in mirrors:
            data_source_name = mirror.DATA_SOURCE
            data_source = DataSource.lookup(self._db, data_source_name)
            self.data_source_ids.append(data_source.id)
Exemplo n.º 20
0
    def setup(self):
        super(TestFeedbooksOPDSImporter, self).setup()
        self.http = DummyHTTPClient()
        self.metadata = DummyMetadataClient()
        self.mirror = MockS3Uploader()

        self.data_source = DataSource.lookup(self._db, DataSource.FEEDBOOKS)

        # Create a default importer that's good enough for most tests.
        self.collection, self.importer = self._importer()
Exemplo n.º 21
0
    def test_lookup_equivalent_isbns(self):
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        api = MockNoveListAPI.from_config(self._default_library)

        # If there are no ISBN equivalents, it returns None.
        eq_(None, api.lookup_equivalent_isbns(identifier))

        source = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        identifier.equivalent_to(source, self._identifier(), strength=1)
        self._db.commit()
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # If there's an ISBN equivalent, but it doesn't result in metadata,
        # it returns none.
        isbn = self._identifier(identifier_type=Identifier.ISBN)
        identifier.equivalent_to(source, isbn, strength=1)
        self._db.commit()
        api.responses.append(None)
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # Create an API class that can mockout NoveListAPI.choose_best_metadata
        class MockBestMetadataAPI(MockNoveListAPI):
            choose_best_metadata_return = None
            def choose_best_metadata(self, *args, **kwargs):
                return self.choose_best_metadata_return
        api = MockBestMetadataAPI.from_config(self._default_library)

        # Give the identifier another ISBN equivalent.
        isbn2 = self._identifier(identifier_type=Identifier.ISBN)
        identifier.equivalent_to(source, isbn2, strength=1)
        self._db.commit()

        # Queue metadata responses for each ISBN lookup.
        metadatas = [object(), object()]
        api.responses.extend(metadatas)

        # If choose_best_metadata returns None, the lookup returns None.
        api.choose_best_metadata_return = (None, None)
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # Lookup was performed for both ISBNs.
        eq_([], api.responses)

        # If choose_best_metadata returns a low confidence metadata, the
        # lookup returns None.
        api.responses.extend(metadatas)
        api.choose_best_metadata_return = (metadatas[0], 0.33)
        eq_(None, api.lookup_equivalent_isbns(identifier))

        # If choose_best_metadata returns a high confidence metadata, the
        # lookup returns the metadata.
        api.responses.extend(metadatas)
        api.choose_best_metadata_return = (metadatas[1], 0.67)
        eq_(metadatas[1], api.lookup_equivalent_isbns(identifier))
Exemplo n.º 22
0
    def test_feed_includes_staff_rating(self):
        work = self._work(with_open_access_download=True)
        lp = work.license_pools[0]
        staff_data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF)
        lp.identifier.add_measurement(staff_data_source, Measurement.RATING, 3, weight=1000)

        feed = AcquisitionFeed(self._db, "test", "url", [work], AdminAnnotator(None, self._default_library, test_mode=True))
        [entry] = feedparser.parse(unicode(feed))['entries']
        rating = entry['schema_rating']
        eq_(3, float(rating['schema:ratingvalue']))
        eq_(Measurement.RATING, rating['additionaltype'])
Exemplo n.º 23
0
    def __init__(self, _db, api=None, **kwargs):
        input_identifier_types = [
            Identifier.GUTENBERG_ID, Identifier.URI
        ]
        output_source = DataSource.lookup(_db, DataSource.OCLC)
        super(OCLCClassifyCoverageProvider, self).__init__(
            "OCLC Classify Coverage Provider", input_identifier_types,
            output_source)

        self._db = _db
        self.api = api or OCLCClassifyAPI(self._db)
Exemplo n.º 24
0
 def setup(self):
     super(TestVendorIDModel, self).setup()
     self.authenticator = DummyMilleniumPatronAPI()
     self.model = AdobeVendorIDModel(self._db, self.authenticator,
                                     self.TEST_NODE_VALUE)
     self.data_source = DataSource.lookup(self._db, DataSource.ADOBE)
     # Normally this test patron doesn't have an authorization identifier.
     # Let's make sure there is one so it'll show up as the label.
     self.bob_patron = self.authenticator.authenticated_patron(
         self._db, dict(username="******", password="******"))
     self.bob_patron.authorization_identifier = "5"
Exemplo n.º 25
0
    def test_facets(self):
        # Normally we yield one FeaturedFacets object for each of the
        # library's enabled entry points.
        library = self._default_library
        script = CacheOPDSGroupFeedPerLane(
            self._db, manager=object(), cmd_args=[]
        )
        setting = library.setting(EntryPoint.ENABLED_SETTING)
        setting.value = json.dumps(
            [AudiobooksEntryPoint.INTERNAL_NAME,
             EbooksEntryPoint.INTERNAL_NAME]
        )

        lane = self._lane()
        audio_facets, ebook_facets = script.facets(lane)
        eq_(AudiobooksEntryPoint, audio_facets.entrypoint)
        eq_(EbooksEntryPoint, ebook_facets.entrypoint)

        # The first entry point in the library's list of enabled entry
        # points is treated as the default.
        eq_(True, audio_facets.entrypoint_is_default)
        eq_(audio_facets.entrypoint, list(library.entrypoints)[0])
        eq_(False, ebook_facets.entrypoint_is_default)

        for facets in (audio_facets, ebook_facets):
            # The FeaturedFacets objects knows to feature works at the
            # library's minimum quality level.
            eq_(library.minimum_featured_quality,
                facets.minimum_featured_quality)
            # The FeaturedFacets object knows that custom lists are
            # not in play.
            eq_(False, facets.uses_customlists)

        # The first entry point is treated as the default only for WorkLists
        # that have no parent. When the WorkList has a parent, the selected
        # entry point is treated as an explicit choice  -- navigating downward
        # in the lane hierarchy ratifies the default value.
        sublane = self._lane(parent=lane)
        f1, f2 = script.facets(sublane)
        for f in f1, f2:
            eq_(False, f.entrypoint_is_default)

        # Make it look like the lane uses custom lists.
        lane.list_datasource = DataSource.lookup(self._db, DataSource.OVERDRIVE)

        # If the library has no enabled entry points, we yield one
        # FeaturedFacets object with no particular entry point.
        setting.value = json.dumps([])
        no_entry_point, = script.facets(lane)
        eq_(None, no_entry_point.entrypoint)

        # The FeaturedFacets object knows that custom lists are in
        # play.
        eq_(True, no_entry_point.uses_customlists)
Exemplo n.º 26
0
 def process_item(self, identifier):
     data_source = DataSource.lookup(
         self._db, self.importer.data_source_name
     )
     try:
         response = self.content_server.lookup([identifier])
     except BadResponseException, e:
         return CoverageFailure(
             identifier,
             e.message,
             data_source
         )
Exemplo n.º 27
0
    def test_new_isbns(self):
        existing_id = self._identifier()
        metadata = Metadata(
            DataSource.lookup(self._db, DataSource.GUTENBERG),
            identifiers=[
                IdentifierData(type=Identifier.OCLC_WORK, identifier="abra"),
                IdentifierData(type=existing_id.type, identifier=existing_id.identifier),
                IdentifierData(type=Identifier.ISBN, identifier="kadabra"),
            ]
        )

        eq_(2, self.provider.new_isbns(metadata))
Exemplo n.º 28
0
 def setup(self):
     super(TestMetadataUploadCoverageProvider, self).setup()
     self.integration = self._external_integration(
         ExternalIntegration.METADATA_WRANGLER,
         goal=ExternalIntegration.METADATA_GOAL, url=self._url,
         username=u'abc', password=u'def'
     )
     self.source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
     self.collection = self._collection(
         protocol=ExternalIntegration.BIBLIOTHECA, external_account_id=u'lib'
     )
     self.provider = self.create_provider()
Exemplo n.º 29
0
    def test_related_books(self):
        # A book with no related books returns a ProblemDetail.
        with temp_config() as config:
            config['integrations'][Configuration.NOVELIST_INTEGRATION] = {}
            with self.app.test_request_context('/'):
                response = self.manager.work_controller.related(
                    self.datasource, self.identifier.type, self.identifier.identifier
                )
        eq_(404, response.status_code)
        eq_("http://librarysimplified.org/terms/problem/unknown-lane", response.uri)

        # Prep book with a book in its series and a recommendation.
        self.lp.presentation_edition.series = "Around the World"
        self.french_1.presentation_edition.series = "Around the World"
        SessionManager.refresh_materialized_views(self._db)

        source = DataSource.lookup(self._db, self.datasource)
        metadata = Metadata(source)
        mock_api = MockNoveListAPI()
        metadata.recommendations = [self.english_2.license_pools[0].identifier]
        mock_api.setup(metadata)

        # A grouped feed is returned with both of these related books
        with self.app.test_request_context('/'):
            response = self.manager.work_controller.related(
                self.datasource, self.identifier.type, self.identifier.identifier,
                novelist_api=mock_api
            )
        eq_(200, response.status_code)
        feed = feedparser.parse(response.data)
        eq_(3, len(feed['entries']))

        # One book is in the recommendations feed.
        [e1] = [e for e in feed['entries'] if e['title'] == self.english_2.title]
        [collection_link] = [link for link in e1['links'] if link['rel']=='collection']
        eq_("Recommended Books", collection_link['title'])
        work_url = "/works/%s/%s/%s/" % (self.datasource, self.identifier.type, self.identifier.identifier)
        expected = urllib.quote(work_url + 'recommendations')
        eq_(True, collection_link['href'].endswith(expected))

        # Two books are in the series feed. The original work and its companion
        [e2] = [e for e in feed['entries'] if e['title'] == self.french_1.title]
        [collection_link] = [link for link in e2['links'] if link['rel']=='collection']
        eq_("Around the World", collection_link['title'])
        expected = urllib.quote(work_url + 'series')
        eq_(True, collection_link['href'].endswith(expected))

        [e3] = [e for e in feed['entries'] if e['title'] == self.english_1.title]
        [collection_link] = [link for link in e3['links'] if link['rel']=='collection']
        eq_("Around the World", collection_link['title'])
        expected = urllib.quote(work_url + 'series')
        eq_(True, collection_link['href'].endswith(expected))
Exemplo n.º 30
0
    def test_load_circulation_data(self):
        # Create a directory import script with an empty mock filesystem.
        script = MockDirectoryImportScript(self._db, {})

        identifier = self._identifier(Identifier.GUTENBERG_ID, "2345")
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        mirror = MockS3Uploader()
        args = (identifier, gutenberg, "ebooks", mirror, "Name of book",
                "rights URI")

        # There is nothing on the mock filesystem, so in this case
        # load_circulation_data returns None.
        eq_(None, script.load_circulation_data(*args))

        # But we tried.
        eq_(
            ('2345', 'ebooks', Representation.COMMON_EBOOK_EXTENSIONS,
             'ebook file'),
            script._locate_file_args
        )

        # Try another script that has a populated mock filesystem.
        mock_filesystem = {
            'ebooks' : (
                'book.epub', Representation.EPUB_MEDIA_TYPE, "I'm an EPUB."
            )
        }
        script = MockDirectoryImportScript(self._db, mock_filesystem)

        # Now _locate_file finds something on the mock filesystem, and
        # load_circulation_data loads it into a fully populated
        # CirculationData object.
        circulation = script.load_circulation_data(*args)
        eq_(identifier, circulation.primary_identifier(self._db))
        eq_(gutenberg, circulation.data_source(self._db))
        eq_("rights URI", circulation.default_rights_uri)

        # The CirculationData has an open-access link associated with it.
        [link] = circulation.links
        eq_(Hyperlink.OPEN_ACCESS_DOWNLOAD, link.rel)
        assert link.href.endswith(
            '/test.content.bucket/Gutenberg/Gutenberg+ID/2345/Name+of+book.epub'
        )
        eq_(Representation.EPUB_MEDIA_TYPE, link.media_type)
        eq_("I'm an EPUB.", link.content)

        # This open-access link will be made available through a
        # delivery mechanism described by this FormatData.
        [format] = circulation.formats
        eq_(link, format.link)
        eq_(link.media_type, format.content_type)
        eq_(DeliveryMechanism.NO_DRM, format.drm_scheme)
Exemplo n.º 31
0
 def source(self):
     return DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA)
Exemplo n.º 32
0
 def __init__(self, db, uploader=None):
     self._db = db
     self.data_source = DataSource.lookup(self._db, self.DATA_SOURCE)
     self.uploader = uploader or S3Uploader.from_config(self._db)
     self.log = logging.getLogger("Cover Image Mirror")
Exemplo n.º 33
0
    def test_import(self):
        feed = self.get_data("biblioboard_mini_feed.opds")

        data_source = DataSource.lookup(self._db,
                                        "Biblioboard",
                                        autocreate=True)
        collection = MockOPDSForDistributorsAPI.mock_collection(self._db)
        collection.external_integration.set_setting(
            Collection.DATA_SOURCE_NAME_SETTING, data_source.name)

        class MockMetadataClient(object):
            def canonicalize_author_name(self, identifier,
                                         working_display_name):
                return working_display_name

        metadata_client = MockMetadataClient()
        importer = OPDSForDistributorsImporter(
            self._db,
            collection=collection,
            metadata_client=metadata_client,
        )

        imported_editions, imported_pools, imported_works, failures = (
            importer.import_from_feed(feed))

        # This importer works the same as the base OPDSImporter, except that
        # it adds delivery mechanisms for books with epub acquisition links
        # and sets pools' licenses_owned and licenses_available.

        # Both works were created, since we can use their acquisition links
        # to give copies to patrons.
        [camelot, southern] = sorted(imported_works, key=lambda x: x.title)

        # Each work has a license pool.
        [camelot_pool] = camelot.license_pools
        [southern_pool] = southern.license_pools
        now = datetime.datetime.utcnow()

        for pool in [camelot_pool, southern_pool]:
            eq_(False, pool.open_access)
            eq_(RightsStatus.IN_COPYRIGHT,
                pool.delivery_mechanisms[0].rights_status.uri)
            eq_(Representation.EPUB_MEDIA_TYPE,
                pool.delivery_mechanisms[0].delivery_mechanism.content_type)
            eq_(DeliveryMechanism.BEARER_TOKEN,
                pool.delivery_mechanisms[0].delivery_mechanism.drm_scheme)
            eq_(1, pool.licenses_owned)
            eq_(1, pool.licenses_available)
            assert (pool.work.last_update_time - now).total_seconds() <= 2

        [camelot_acquisition_link] = [
            l for l in camelot_pool.identifier.links
            if l.rel == Hyperlink.GENERIC_OPDS_ACQUISITION and l.resource.
            representation.media_type == Representation.EPUB_MEDIA_TYPE
        ]
        camelot_acquisition_url = camelot_acquisition_link.resource.representation.url
        eq_(
            "https://library.biblioboard.com/ext/api/media/04377e87-ab69-41c8-a2a4-812d55dc0952/assets/content.epub",
            camelot_acquisition_url)

        [southern_acquisition_link] = [
            l for l in southern_pool.identifier.links
            if l.rel == Hyperlink.GENERIC_OPDS_ACQUISITION and l.resource.
            representation.media_type == Representation.EPUB_MEDIA_TYPE
        ]
        southern_acquisition_url = southern_acquisition_link.resource.representation.url
        eq_(
            "https://library.biblioboard.com/ext/api/media/04da95cd-6cfc-4e82-810f-121d418b6963/assets/content.epub",
            southern_acquisition_url)
Exemplo n.º 34
0
    def change_book_cover(self, identifier_type, identifier, mirrors=None):
        """Save a new book cover based on the submitted form."""
        self.require_librarian(flask.request.library)

        data_source = DataSource.lookup(self._db, DataSource.LIBRARY_STAFF)

        work = self.load_work(flask.request.library, identifier_type,
                              identifier)
        if isinstance(work, ProblemDetail):
            return work

        rights_uri = flask.request.form.get("rights_status")
        rights_explanation = flask.request.form.get("rights_explanation")

        if not rights_uri:
            return INVALID_IMAGE.detailed(
                _("You must specify the image's license."))

        collection = self._get_collection_from_pools(identifier_type,
                                                     identifier)
        if isinstance(collection, ProblemDetail):
            return collection

        # Look for an appropriate mirror to store this cover image. Since the
        # mirror should be used for covers, we don't need a mirror for books.
        mirrors = mirrors or dict(covers_mirror=MirrorUploader.for_collection(
            collection, ExternalIntegrationLink.COVERS),
                                  books_mirror=None)
        if not mirrors.get(ExternalIntegrationLink.COVERS):
            return INVALID_CONFIGURATION_OPTION.detailed(
                _("Could not find a storage integration for uploading the cover."
                  ))

        image = self.generate_cover_image(work, identifier_type, identifier)
        if isinstance(image, ProblemDetail):
            return image

        original, derivation_settings, cover_href, cover_rights_explanation = self._original_cover_info(
            image, work, data_source, rights_uri, rights_explanation)

        buffer = StringIO()
        image.save(buffer, format="PNG")
        content = buffer.getvalue()

        if not cover_href:
            cover_href = Hyperlink.generic_uri(
                data_source,
                work.presentation_edition.primary_identifier,
                Hyperlink.IMAGE,
                content=content)

        cover_data = LinkData(
            Hyperlink.IMAGE,
            href=cover_href,
            media_type=Representation.PNG_MEDIA_TYPE,
            content=content,
            rights_uri=rights_uri,
            rights_explanation=cover_rights_explanation,
            original=original,
            transformation_settings=derivation_settings,
        )

        presentation_policy = PresentationCalculationPolicy(
            choose_edition=False,
            set_edition_metadata=False,
            classify=False,
            choose_summary=False,
            calculate_quality=False,
            choose_cover=True,
            regenerate_opds_entries=True,
            regenerate_marc_record=True,
            update_search_index=False,
        )

        replacement_policy = ReplacementPolicy(
            links=True,
            # link_content is false because we already have the content.
            # We don't want the metadata layer to try to fetch it again.
            link_content=False,
            mirrors=mirrors,
            presentation_calculation_policy=presentation_policy,
        )

        metadata = Metadata(data_source, links=[cover_data])
        metadata.apply(work.presentation_edition,
                       collection,
                       replace=replacement_policy)

        # metadata.apply only updates the edition, so we also need
        # to update the work.
        work.calculate_presentation(policy=presentation_policy)

        return Response(_("Success"), 200)
Exemplo n.º 35
0
    def setup_method(self):
        super(TestProQuestCredentialManager, self).setup_method()

        self._data_source = DataSource.lookup(
            self._db, DataSource.PROQUEST, autocreate=True
        )
Exemplo n.º 36
0
    def edit_classifications(self, identifier_type, identifier):
        """Edit a work's audience, target age, fiction status, and genres."""
        self.require_librarian(flask.request.library)

        work = self.load_work(flask.request.library, identifier_type,
                              identifier)
        if isinstance(work, ProblemDetail):
            return work

        staff_data_source = DataSource.lookup(self._db,
                                              DataSource.LIBRARY_STAFF)

        # Previous staff classifications
        primary_identifier = work.presentation_edition.primary_identifier
        old_classifications = self._db \
            .query(Classification) \
            .join(Subject) \
            .filter(
                Classification.identifier == primary_identifier,
                Classification.data_source == staff_data_source
            )
        old_genre_classifications = old_classifications \
            .filter(Subject.genre_id != None)
        old_staff_genres = [
            c.subject.genre.name for c in old_genre_classifications
            if c.subject.genre
        ]
        old_computed_genres = [
            work_genre.genre.name for work_genre in work.work_genres
        ]

        # New genres should be compared to previously computed genres
        new_genres = flask.request.form.getlist("genres")
        genres_changed = sorted(new_genres) != sorted(old_computed_genres)

        # Update audience
        new_audience = flask.request.form.get("audience")
        if new_audience != work.audience:
            # Delete all previous staff audience classifications
            for c in old_classifications:
                if c.subject.type == Subject.FREEFORM_AUDIENCE:
                    self._db.delete(c)

            # Create a new classification with a high weight
            primary_identifier.classify(
                data_source=staff_data_source,
                subject_type=Subject.FREEFORM_AUDIENCE,
                subject_identifier=new_audience,
                weight=WorkController.STAFF_WEIGHT,
            )

        # Update target age if present
        new_target_age_min = flask.request.form.get("target_age_min")
        new_target_age_min = int(
            new_target_age_min) if new_target_age_min else None
        new_target_age_max = flask.request.form.get("target_age_max")
        new_target_age_max = int(
            new_target_age_max) if new_target_age_max else None
        if new_target_age_max < new_target_age_min:
            return INVALID_EDIT.detailed(
                _("Minimum target age must be less than maximum target age."))

        if work.target_age:
            old_target_age_min = work.target_age.lower
            old_target_age_max = work.target_age.upper
        else:
            old_target_age_min = None
            old_target_age_max = None
        if new_target_age_min != old_target_age_min or new_target_age_max != old_target_age_max:
            # Delete all previous staff target age classifications
            for c in old_classifications:
                if c.subject.type == Subject.AGE_RANGE:
                    self._db.delete(c)

            # Create a new classification with a high weight - higher than audience
            if new_target_age_min and new_target_age_max:
                age_range_identifier = "%s-%s" % (new_target_age_min,
                                                  new_target_age_max)
                primary_identifier.classify(
                    data_source=staff_data_source,
                    subject_type=Subject.AGE_RANGE,
                    subject_identifier=age_range_identifier,
                    weight=WorkController.STAFF_WEIGHT * 100,
                )

        # Update fiction status
        # If fiction status hasn't changed but genres have changed,
        # we still want to ensure that there's a staff classification
        new_fiction = True if flask.request.form.get(
            "fiction") == "fiction" else False
        if new_fiction != work.fiction or genres_changed:
            # Delete previous staff fiction classifications
            for c in old_classifications:
                if c.subject.type == Subject.SIMPLIFIED_FICTION_STATUS:
                    self._db.delete(c)

            # Create a new classification with a high weight (higher than genre)
            fiction_term = "Fiction" if new_fiction else "Nonfiction"
            classification = primary_identifier.classify(
                data_source=staff_data_source,
                subject_type=Subject.SIMPLIFIED_FICTION_STATUS,
                subject_identifier=fiction_term,
                weight=WorkController.STAFF_WEIGHT,
            )
            classification.subject.fiction = new_fiction

        # Update genres
        # make sure all new genres are legit
        for name in new_genres:
            genre, is_new = Genre.lookup(self._db, name)
            if not isinstance(genre, Genre):
                return GENRE_NOT_FOUND
            if genres[name].is_fiction is not None and genres[
                    name].is_fiction != new_fiction:
                return INCOMPATIBLE_GENRE
            if name == "Erotica" and new_audience != "Adults Only":
                return EROTICA_FOR_ADULTS_ONLY

        if genres_changed:
            # delete existing staff classifications for genres that aren't being kept
            for c in old_genre_classifications:
                if c.subject.genre.name not in new_genres:
                    self._db.delete(c)

            # add new staff classifications for new genres
            for genre in new_genres:
                if genre not in old_staff_genres:
                    classification = primary_identifier.classify(
                        data_source=staff_data_source,
                        subject_type=Subject.SIMPLIFIED_GENRE,
                        subject_identifier=genre,
                        weight=WorkController.STAFF_WEIGHT)

            # add NONE genre classification if we aren't keeping any genres
            if len(new_genres) == 0:
                primary_identifier.classify(
                    data_source=staff_data_source,
                    subject_type=Subject.SIMPLIFIED_GENRE,
                    subject_identifier=SimplifiedGenreClassifier.NONE,
                    weight=WorkController.STAFF_WEIGHT)
            else:
                # otherwise delete existing NONE genre classification
                none_classifications = self._db \
                    .query(Classification) \
                    .join(Subject) \
                    .filter(
                        Classification.identifier == primary_identifier,
                        Subject.identifier == SimplifiedGenreClassifier.NONE
                    ) \
                    .all()
                for c in none_classifications:
                    self._db.delete(c)

        # Update presentation
        policy = PresentationCalculationPolicy(classify=True,
                                               regenerate_opds_entries=True,
                                               regenerate_marc_record=True,
                                               update_search_index=True)
        work.calculate_presentation(policy=policy)

        return Response("", 200)
Exemplo n.º 37
0
    def edit(self, identifier_type, identifier):
        """Edit a work's metadata."""
        self.require_librarian(flask.request.library)

        # TODO: It would be nice to use the metadata layer for this, but
        # this code handles empty values differently than other metadata
        # sources. When a staff member deletes a value, that indicates
        # they think it should be empty. This needs to be indicated in the
        # db so that it can overrule other data sources that set a value,
        # unlike other sources which set empty fields to None.

        work = self.load_work(flask.request.library, identifier_type,
                              identifier)
        if isinstance(work, ProblemDetail):
            return work

        changed = False

        staff_data_source = DataSource.lookup(self._db,
                                              DataSource.LIBRARY_STAFF)
        primary_identifier = work.presentation_edition.primary_identifier
        staff_edition, is_new = get_one_or_create(
            self._db,
            Edition,
            primary_identifier_id=primary_identifier.id,
            data_source_id=staff_data_source.id)
        self._db.expire(primary_identifier)

        new_title = flask.request.form.get("title")
        if new_title and work.title != new_title:
            staff_edition.title = unicode(new_title)
            changed = True

        new_subtitle = flask.request.form.get("subtitle")
        if work.subtitle != new_subtitle:
            if work.subtitle and not new_subtitle:
                new_subtitle = NO_VALUE
            staff_edition.subtitle = unicode(new_subtitle)
            changed = True

        # The form data includes roles and names for contributors in the same order.
        new_contributor_roles = flask.request.form.getlist("contributor-role")
        new_contributor_names = [
            unicode(n) for n in flask.request.form.getlist("contributor-name")
        ]
        # The first author in the form is considered the primary author, even
        # though there's no separate MARC code for that.
        for i, role in enumerate(new_contributor_roles):
            if role == Contributor.AUTHOR_ROLE:
                new_contributor_roles[i] = Contributor.PRIMARY_AUTHOR_ROLE
                break
        roles_and_names = zip(new_contributor_roles, new_contributor_names)

        # Remove any contributions that weren't in the form, and remove contributions
        # that already exist from the list so they won't be added again.
        deleted_contributions = False
        for contribution in staff_edition.contributions:
            if (contribution.role, contribution.contributor.display_name
                ) not in roles_and_names:
                self._db.delete(contribution)
                deleted_contributions = True
                changed = True
            else:
                roles_and_names.remove(
                    (contribution.role, contribution.contributor.display_name))
        if deleted_contributions:
            # Ensure the staff edition's contributions are up-to-date when
            # calculating the presentation edition later.
            self._db.refresh(staff_edition)

        # Any remaining roles and names are new contributions.
        for role, name in roles_and_names:
            # There may be one extra role at the end from the input for
            # adding a contributor, in which case it will have no
            # corresponding name and can be ignored.
            if name:
                if role not in Contributor.MARC_ROLE_CODES.keys():
                    self._db.rollback()
                    return UNKNOWN_ROLE.detailed(
                        _("Role %(role)s is not one of the known contributor roles.",
                          role=role))
                contributor = staff_edition.add_contributor(name=name,
                                                            roles=[role])
                contributor.display_name = name
                changed = True

        new_series = flask.request.form.get("series")
        if work.series != new_series:
            if work.series and not new_series:
                new_series = NO_VALUE
            staff_edition.series = unicode(new_series)
            changed = True

        new_series_position = flask.request.form.get("series_position")
        if new_series_position != None and new_series_position != '':
            try:
                new_series_position = int(new_series_position)
            except ValueError:
                self._db.rollback()
                return INVALID_SERIES_POSITION
        else:
            new_series_position = None
        if work.series_position != new_series_position:
            if work.series_position and new_series_position == None:
                new_series_position = NO_NUMBER
            staff_edition.series_position = new_series_position
            changed = True

        new_medium = flask.request.form.get("medium")
        if new_medium:
            if new_medium not in Edition.medium_to_additional_type.keys():
                self._db.rollback()
                return UNKNOWN_MEDIUM.detailed(
                    _("Medium %(medium)s is not one of the known media.",
                      medium=new_medium))
            staff_edition.medium = new_medium
            changed = True

        new_language = flask.request.form.get("language")
        if new_language != None and new_language != '':
            new_language = LanguageCodes.string_to_alpha_3(new_language)
            if not new_language:
                self._db.rollback()
                return UNKNOWN_LANGUAGE
        else:
            new_language = None
        if new_language != staff_edition.language:
            staff_edition.language = new_language
            changed = True

        new_publisher = flask.request.form.get("publisher")
        if new_publisher != staff_edition.publisher:
            if staff_edition.publisher and not new_publisher:
                new_publisher = NO_VALUE
            staff_edition.publisher = unicode(new_publisher)
            changed = True

        new_imprint = flask.request.form.get("imprint")
        if new_imprint != staff_edition.imprint:
            if staff_edition.imprint and not new_imprint:
                new_imprint = NO_VALUE
            staff_edition.imprint = unicode(new_imprint)
            changed = True

        new_issued = flask.request.form.get("issued")
        if new_issued != None and new_issued != '':
            try:
                new_issued = datetime.strptime(new_issued, '%Y-%m-%d')
            except ValueError:
                self._db.rollback()
                return INVALID_DATE_FORMAT
        else:
            new_issued = None
        if new_issued != staff_edition.issued:
            staff_edition.issued = new_issued
            changed = True

        # TODO: This lets library staff add a 1-5 rating, which is used in the
        # quality calculation. However, this doesn't work well if there are any
        # other measurements that contribute to the quality. The form will show
        # the calculated quality rather than the staff rating, which will be
        # confusing. It might also be useful to make it more clear how this
        # relates to the quality threshold in the library settings.
        changed_rating = False
        new_rating = flask.request.form.get("rating")
        if new_rating != None and new_rating != '':
            try:
                new_rating = float(new_rating)
            except ValueError:
                self._db.rollback()
                return INVALID_RATING
            scale = Measurement.RATING_SCALES[DataSource.LIBRARY_STAFF]
            if new_rating < scale[0] or new_rating > scale[1]:
                self._db.rollback()
                return INVALID_RATING.detailed(
                    _("The rating must be a number between %(low)s and %(high)s.",
                      low=scale[0],
                      high=scale[1]))
            if (new_rating - scale[0]) / (scale[1] - scale[0]) != work.quality:
                primary_identifier.add_measurement(
                    staff_data_source,
                    Measurement.RATING,
                    new_rating,
                    weight=WorkController.STAFF_WEIGHT)
                changed = True
                changed_rating = True

        changed_summary = False
        new_summary = flask.request.form.get("summary") or ""
        if new_summary != work.summary_text:
            old_summary = None
            if work.summary and work.summary.data_source == staff_data_source:
                old_summary = work.summary

            work.presentation_edition.primary_identifier.add_link(
                Hyperlink.DESCRIPTION,
                None,
                staff_data_source,
                content=new_summary)

            # Delete previous staff summary
            if old_summary:
                for link in old_summary.links:
                    self._db.delete(link)
                self._db.delete(old_summary)

            changed = True
            changed_summary = True

        if changed:
            # Even if the presentation doesn't visibly change, we want
            # to regenerate the OPDS entries and update the search
            # index for the work, because that might be the 'real'
            # problem the user is trying to fix.
            policy = PresentationCalculationPolicy(
                classify=True,
                regenerate_opds_entries=True,
                regenerate_marc_record=True,
                update_search_index=True,
                calculate_quality=changed_rating,
                choose_summary=changed_summary,
            )
            work.calculate_presentation(policy=policy)

        return Response("", 200)
Exemplo n.º 38
0
 def data_source(self):
     return DataSource.lookup(self._db, DataSource.ADOBE)
Exemplo n.º 39
0
    def test_create_record(self):
        work = self._work(
            with_license_pool=True,
            title="old title",
            authors=["old author"],
            data_source_name=DataSource.OVERDRIVE,
        )
        annotator = Annotator()

        # The record isn't cached yet, so a new record is created and cached.
        assert None == work.marc_record
        record = MARCExporter.create_record(work, annotator)
        [title_field] = record.get_fields("245")
        assert "old title" == title_field.get_subfields("a")[0]
        [author_field] = record.get_fields("100")
        assert "author, old" == author_field.get_subfields("a")[0]
        [distributor_field] = record.get_fields("264")
        assert DataSource.OVERDRIVE == distributor_field.get_subfields("b")[0]
        cached = work.marc_record
        assert "old title" in cached
        assert "author, old" in cached
        # The distributor isn't part of the cached record.
        assert DataSource.OVERDRIVE not in cached

        work.presentation_edition.title = "new title"
        work.presentation_edition.sort_author = "author, new"
        new_data_source = DataSource.lookup(self._db, DataSource.BIBLIOTHECA)
        work.license_pools[0].data_source = new_data_source

        # Now that the record is cached, creating a record will
        # use the cache. Distributor will be updated since it's
        # not part of the cached record.
        record = MARCExporter.create_record(work, annotator)
        [title_field] = record.get_fields("245")
        assert "old title" == title_field.get_subfields("a")[0]
        [author_field] = record.get_fields("100")
        assert "author, old" == author_field.get_subfields("a")[0]
        [distributor_field] = record.get_fields("264")
        assert DataSource.BIBLIOTHECA == distributor_field.get_subfields(
            "b")[0]

        # But we can force an update to the cached record.
        record = MARCExporter.create_record(work, annotator, force_create=True)
        [title_field] = record.get_fields("245")
        assert "new title" == title_field.get_subfields("a")[0]
        [author_field] = record.get_fields("100")
        assert "author, new" == author_field.get_subfields("a")[0]
        [distributor_field] = record.get_fields("264")
        assert DataSource.BIBLIOTHECA == distributor_field.get_subfields(
            "b")[0]
        cached = work.marc_record
        assert "old title" not in cached
        assert "author, old" not in cached
        assert "new title" in cached
        assert "author, new" in cached

        # If we pass in an integration, it's passed along to the annotator.
        integration = self._integration()

        class MockAnnotator(Annotator):
            integration = None

            def annotate_work_record(self, work, pool, edition, identifier,
                                     record, integration):
                self.integration = integration

        annotator = MockAnnotator()
        record = MARCExporter.create_record(work,
                                            annotator,
                                            integration=integration)
        assert integration == annotator.integration
Exemplo n.º 40
0
 def source(self):
     return DataSource.lookup(self._db, DataSource.ODILO)
Exemplo n.º 41
0
 def source(self):
     return DataSource.lookup(_db, DataSource.NYT)
Exemplo n.º 42
0
    def test_create_lanes_for_large_collection(self):
        languages = ['eng', 'spa']
        create_lanes_for_large_collection(self._db, self._default_library,
                                          languages)
        lanes = self._db.query(Lane).filter(Lane.parent_id == None).order_by(
            Lane.priority).all()

        # We have five top-level lanes.
        eq_(5, len(lanes))
        eq_([
            'Fiction', 'Nonfiction', 'Young Adult Fiction',
            'Young Adult Nonfiction', 'Children and Middle Grade'
        ], [x.display_name for x in lanes])
        for lane in lanes:
            eq_(self._default_library, lane.library)
            # They all are restricted to English and Spanish.
            eq_(x.languages, languages)

            # They have no restrictions on media type -- that's handled
            # with entry points.
            eq_(None, x.media)

        eq_([
            'Fiction', 'Nonfiction', 'Young Adult Fiction',
            'Young Adult Nonfiction', 'Children and Middle Grade'
        ], [x.display_name for x in lanes])

        # The Adult Fiction and Adult Nonfiction lanes reproduce the
        # genre structure found in the genre definitions.
        fiction, nonfiction = lanes[0:2]
        [sf] = [
            x for x in fiction.sublanes if 'Science Fiction' in x.display_name
        ]
        [periodicals] = [
            x for x in nonfiction.sublanes if 'Periodicals' in x.display_name
        ]
        eq_(True, sf.fiction)
        eq_("Science Fiction", sf.display_name)
        assert 'Science Fiction' in [genre.name for genre in sf.genres]

        [nonfiction_humor
         ] = [x for x in nonfiction.sublanes if 'Humor' in x.display_name]
        eq_(False, nonfiction_humor.fiction)

        [fiction_humor
         ] = [x for x in fiction.sublanes if 'Humor' in x.display_name]
        eq_(True, fiction_humor.fiction)

        [space_opera
         ] = [x for x in sf.sublanes if 'Space Opera' in x.display_name]
        eq_(True, sf.fiction)
        eq_("Space Opera", space_opera.display_name)
        eq_(["Space Opera"], [genre.name for genre in space_opera.genres])

        [history
         ] = [x for x in nonfiction.sublanes if 'History' in x.display_name]
        eq_(False, history.fiction)
        eq_("History", history.display_name)
        assert 'History' in [genre.name for genre in history.genres]
        [european_history] = [
            x for x in history.sublanes if 'European History' in x.display_name
        ]
        assert 'European History' in [
            genre.name for genre in european_history.genres
        ]

        # Delete existing lanes.
        for lane in self._db.query(Lane).filter(
                Lane.library_id == self._default_library.id):
            self._db.delete(lane)

        # If there's an NYT Best Sellers integration and we create the lanes again...
        integration, ignore = create(self._db,
                                     ExternalIntegration,
                                     goal=ExternalIntegration.METADATA_GOAL,
                                     protocol=ExternalIntegration.NYT)

        create_lanes_for_large_collection(self._db, self._default_library,
                                          languages)
        lanes = self._db.query(Lane).filter(Lane.parent_id == None).order_by(
            Lane.priority).all()

        # Now we have six top-level lanes, with best sellers at the beginning.
        eq_([
            u'Best Sellers', 'Fiction', 'Nonfiction', 'Young Adult Fiction',
            'Young Adult Nonfiction', 'Children and Middle Grade'
        ], [x.display_name for x in lanes])

        # Each sublane other than best sellers also contains a best sellers lane.
        for sublane in lanes[1:]:
            best_sellers = sublane.visible_children[0]
            eq_("Best Sellers", best_sellers.display_name)

        # The best sellers lane has a data source.
        nyt_data_source = DataSource.lookup(self._db, DataSource.NYT)
        eq_(nyt_data_source, lanes[0].list_datasource)
 def data_source(self):
     """Use the collection's name as the data source name."""
     return DataSource.lookup(self._db,
                              self.collection.name,
                              autocreate=True)
Exemplo n.º 44
0
    def test_annotate_metadata(self):
        """Verify that annotate_metadata calls load_circulation_data
        and load_cover_link appropriately.
        """

        # First, test an unsuccessful annotation.
        class MockNoCirculationData(DirectoryImportScript):
            """Do nothing when load_circulation_data is called. Explode if
            load_cover_link is called.
            """
            def load_circulation_data(self, *args):
                self.load_circulation_data_args = args
                return None

            def load_cover_link(self, *args):
                raise Exception("Explode!")

        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "11111")
        identifier_obj, ignore = identifier.load(self._db)
        metadata = Metadata(
            title=self._str,
            data_source=gutenberg,
            primary_identifier=identifier
        )
        mirror = object()
        policy = ReplacementPolicy(mirror=mirror)
        cover_directory = object()
        ebook_directory = object()
        rights_uri = object()

        script = MockNoCirculationData(self._db)
        args = (metadata, policy, cover_directory, ebook_directory, rights_uri)
        script.annotate_metadata(*args)

        # load_circulation_data was called.
        eq_(
            (identifier_obj, gutenberg, ebook_directory, mirror,
             metadata.title, rights_uri),
            script.load_circulation_data_args
        )

        # But because load_circulation_data returned None,
        # metadata.circulation_data was not modified and
        # load_cover_link was not called (which would have raised an
        # exception).
        eq_(None, metadata.circulation)

        # Test a successful annotation with no cover image.
        class MockNoCoverLink(DirectoryImportScript):
            """Return an object when load_circulation_data is called.
            Do nothing when load_cover_link is called.
            """
            def load_circulation_data(self, *args):
                return "Some circulation data"

            def load_cover_link(self, *args):
                self.load_cover_link_args = args
                return None

        script = MockNoCoverLink(self._db)
        script.annotate_metadata(*args)

        # The Metadata object was annotated with the return value of
        # load_circulation_data.
        eq_("Some circulation data", metadata.circulation)

        # load_cover_link was called.
        eq_(
            (identifier_obj, gutenberg, cover_directory, mirror),
            script.load_cover_link_args
        )

        # But since it provided no cover link, metadata.links was empty.
        eq_([], metadata.links)

        # Finally, test a completely successful annotation.
        class MockWithCoverLink(DirectoryImportScript):
            """Mock success for both load_circulation_data
            and load_cover_link.
            """
            def load_circulation_data(self, *args):
                return "Some circulation data"

            def load_cover_link(self, *args):
                return "A cover link"

        metadata.circulation = None
        script = MockWithCoverLink(self._db)
        script.annotate_metadata(*args)

        eq_("Some circulation data", metadata.circulation)
        eq_(['A cover link'], metadata.links)
Exemplo n.º 45
0
 def _quality(self, value, weight=1):
     # The only source we recognize for quality scores is the metadata
     # wrangler.
     source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
     return self._measurement(Measurement.QUALITY, value, source, weight)
 def edition_query(self):
     gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
     return self._db.query(Edition).filter(Edition.data_source == gutenberg)
Exemplo n.º 47
0
    def sync_bookshelf(self, patron, pin):

        # Get the external view of the patron's current state.
        remote_loans, remote_holds, complete = self.patron_activity(
            patron, pin)

        # Get our internal view of the patron's current state.
        __transaction = self._db.begin_nested()
        local_loans = self.local_loans(patron)
        local_holds = self.local_holds(patron)

        now = datetime.datetime.utcnow()
        local_loans_by_identifier = {}
        local_holds_by_identifier = {}
        for l in local_loans:
            if not l.license_pool:
                self.log.error("Active loan with no license pool!")
                continue
            i = l.license_pool.identifier
            if not i:
                self.log.error(
                    "Active loan on license pool %s, which has no identifier!",
                    l.license_pool)
                continue
            key = (i.type, i.identifier)
            local_loans_by_identifier[key] = l
        for h in local_holds:
            if not h.license_pool:
                self.log.error("Active hold with no license pool!")
                continue
            i = h.license_pool.identifier
            if not i:
                self.log.error(
                    "Active hold on license pool %r, which has no identifier!",
                    h.license_pool)
                continue
            key = (i.type, i.identifier)
            local_holds_by_identifier[key] = h

        active_loans = []
        active_holds = []
        for loan in remote_loans:
            # This is a remote loan. Find or create the corresponding
            # local loan.
            source_name = self.identifier_type_to_data_source_name[
                loan.identifier_type]
            source = DataSource.lookup(self._db, source_name)
            key = (loan.identifier_type, loan.identifier)
            pool, ignore = LicensePool.for_foreign_id(self._db, source,
                                                      loan.identifier_type,
                                                      loan.identifier)
            start = loan.start_date or now
            end = loan.end_date
            local_loan, new = pool.loan_to(patron, start, end)
            active_loans.append(local_loan)

            # Remove the local loan from the list so that we don't
            # delete it later.
            if key in local_loans_by_identifier:
                del local_loans_by_identifier[key]

        for hold in remote_holds:
            # This is a remote hold. Find or create the corresponding
            # local hold.
            key = (hold.identifier_type, hold.identifier)
            source_name = self.identifier_type_to_data_source_name[
                hold.identifier_type]
            source = DataSource.lookup(self._db, source_name)
            pool, ignore = LicensePool.for_foreign_id(self._db, source,
                                                      hold.identifier_type,
                                                      hold.identifier)
            start = hold.start_date or now
            end = hold.end_date
            position = hold.hold_position
            local_hold, new = pool.on_hold_to(patron, start, end, position)
            active_holds.append(local_hold)

            # Remove the local hold from the list so that we don't
            # delete it later.
            if key in local_holds_by_identifier:
                del local_holds_by_identifier[key]

        # We only want to delete local loans and holds if we were able to
        # successfully sync with all the providers. If there was an error,
        # the provider might still know about a loan or hold that we don't
        # have in the remote lists.
        if complete:
            # Every loan remaining in loans_by_identifier is a hold that
            # the provider doesn't know about. This usually means it's expired
            # and we should get rid of it, but it's possible the patron is
            # borrowing a book and syncing their bookshelf at the same time,
            # and the local loan was created after we got the remote loans.
            # If the loan's start date is less than a minute ago, we'll keep it.
            for loan in local_loans_by_identifier.values():
                if loan.license_pool.data_source.id in self.data_source_ids_for_sync:
                    one_minute_ago = datetime.datetime.utcnow(
                    ) - datetime.timedelta(minutes=1)
                    if loan.start < one_minute_ago:
                        logging.info(
                            "In sync_bookshelf for patron %s, deleting loan %d (patron %s)"
                            % (patron.authorization_identifier, loan.id,
                               loan.patron.authorization_identifier))
                        self._db.delete(loan)
                    else:
                        logging.info(
                            "In sync_bookshelf for patron %s, found local loan %d created in the past minute that wasn't in remote loans"
                            % (patron.authorization_identifier, loan.id))

            # Every hold remaining in holds_by_identifier is a hold that
            # the provider doesn't know about, which means it's expired
            # and we should get rid of it.
            for hold in local_holds_by_identifier.values():
                if hold.license_pool.data_source.id in self.data_source_ids_for_sync:
                    self._db.delete(hold)

        __transaction.commit()
        return active_loans, active_holds
Exemplo n.º 48
0
def create_lanes_for_large_collection(_db, library, languages, priority=0):
    """Ensure that the lanes appropriate to a large collection are all
    present.

    This means:

    * A "%(language)s Adult Fiction" lane containing sublanes for each fiction
    genre.

    * A "%(language)s Adult Nonfiction" lane containing sublanes for
    each nonfiction genre.

    * A "%(language)s YA Fiction" lane containing sublanes for the
      most popular YA fiction genres.

    * A "%(language)s YA Nonfiction" lane containing sublanes for the
      most popular YA fiction genres.

    * A "%(language)s Children and Middle Grade" lane containing
      sublanes for childrens' books at different age levels.

    :param library: Newly created lanes will be associated with this
        library.
    :param languages: Newly created lanes will contain only books
        in these languages.
    :return: A list of top-level Lane objects.

    TODO: If there are multiple large collections, their top-level lanes do
    not have distinct display names.
    """
    if isinstance(languages, basestring):
        languages = [languages]

    ADULT = Classifier.AUDIENCES_ADULT
    YA = [Classifier.AUDIENCE_YOUNG_ADULT]
    CHILDREN = [Classifier.AUDIENCE_CHILDREN]

    common_args = dict(languages=languages, media=None)
    adult_common_args = dict(common_args)
    adult_common_args['audiences'] = ADULT

    include_best_sellers = False
    nyt_data_source = DataSource.lookup(_db, DataSource.NYT)
    nyt_integration = get_one(
        _db,
        ExternalIntegration,
        goal=ExternalIntegration.METADATA_GOAL,
        protocol=ExternalIntegration.NYT,
    )
    if nyt_integration:
        include_best_sellers = True

    language_identifier = LanguageCodes.name_for_languageset(languages)

    sublanes = []
    if include_best_sellers:
        best_sellers, ignore = create(_db,
                                      Lane,
                                      library=library,
                                      display_name="Best Sellers",
                                      priority=priority,
                                      **common_args)
        priority += 1
        best_sellers.list_datasource = nyt_data_source
        sublanes.append(best_sellers)

    adult_fiction_sublanes = []
    adult_fiction_priority = 0
    if include_best_sellers:
        adult_fiction_best_sellers, ignore = create(
            _db,
            Lane,
            library=library,
            display_name="Best Sellers",
            fiction=True,
            priority=adult_fiction_priority,
            **adult_common_args)
        adult_fiction_priority += 1
        adult_fiction_best_sellers.list_datasource = nyt_data_source
        adult_fiction_sublanes.append(adult_fiction_best_sellers)

    for genre in fiction_genres:
        if isinstance(genre, basestring):
            genre_name = genre
        else:
            genre_name = genre.get("name")
        genre_lane = lane_from_genres(_db,
                                      library, [genre],
                                      priority=adult_fiction_priority,
                                      **adult_common_args)
        adult_fiction_priority += 1
        adult_fiction_sublanes.append(genre_lane)

    adult_fiction, ignore = create(_db,
                                   Lane,
                                   library=library,
                                   display_name="Fiction",
                                   genres=[],
                                   sublanes=adult_fiction_sublanes,
                                   fiction=True,
                                   priority=priority,
                                   **adult_common_args)
    priority += 1
    sublanes.append(adult_fiction)

    adult_nonfiction_sublanes = []
    adult_nonfiction_priority = 0
    if include_best_sellers:
        adult_nonfiction_best_sellers, ignore = create(
            _db,
            Lane,
            library=library,
            display_name="Best Sellers",
            fiction=False,
            priority=adult_nonfiction_priority,
            **adult_common_args)
        adult_nonfiction_priority += 1
        adult_nonfiction_best_sellers.list_datasource = nyt_data_source
        adult_nonfiction_sublanes.append(adult_nonfiction_best_sellers)

    for genre in nonfiction_genres:
        # "Life Strategies" is a YA-specific genre that should not be
        # included in the Adult Nonfiction lane.
        if genre != genres.Life_Strategies:
            if isinstance(genre, basestring):
                genre_name = genre
            else:
                genre_name = genre.get("name")
            genre_lane = lane_from_genres(_db,
                                          library, [genre],
                                          priority=adult_nonfiction_priority,
                                          **adult_common_args)
            adult_nonfiction_priority += 1
            adult_nonfiction_sublanes.append(genre_lane)

    adult_nonfiction, ignore = create(_db,
                                      Lane,
                                      library=library,
                                      display_name="Nonfiction",
                                      genres=[],
                                      sublanes=adult_nonfiction_sublanes,
                                      fiction=False,
                                      priority=priority,
                                      **adult_common_args)
    priority += 1
    sublanes.append(adult_nonfiction)

    ya_common_args = dict(common_args)
    ya_common_args['audiences'] = YA

    ya_fiction, ignore = create(_db,
                                Lane,
                                library=library,
                                display_name="Young Adult Fiction",
                                genres=[],
                                fiction=True,
                                sublanes=[],
                                priority=priority,
                                **ya_common_args)
    priority += 1
    sublanes.append(ya_fiction)

    ya_fiction_priority = 0
    if include_best_sellers:
        ya_fiction_best_sellers, ignore = create(_db,
                                                 Lane,
                                                 library=library,
                                                 display_name="Best Sellers",
                                                 fiction=True,
                                                 priority=ya_fiction_priority,
                                                 **ya_common_args)
        ya_fiction_priority += 1
        ya_fiction_best_sellers.list_datasource = nyt_data_source
        ya_fiction.sublanes.append(ya_fiction_best_sellers)

    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Dystopian_SF],
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Fantasy],
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Comics_Graphic_Novels],
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Literary_Fiction],
                         display_name="Contemporary Fiction",
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.LGBTQ_Fiction],
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Suspense_Thriller, genres.Mystery],
                         display_name="Mystery & Thriller",
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Romance],
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(
            _db,
            library, [genres.Science_Fiction],
            exclude_genres=[genres.Dystopian_SF, genres.Steampunk],
            priority=ya_fiction_priority,
            **ya_common_args))
    ya_fiction_priority += 1
    ya_fiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Steampunk],
                         priority=ya_fiction_priority,
                         **ya_common_args))
    ya_fiction_priority += 1

    ya_nonfiction, ignore = create(_db,
                                   Lane,
                                   library=library,
                                   display_name="Young Adult Nonfiction",
                                   genres=[],
                                   fiction=False,
                                   sublanes=[],
                                   priority=priority,
                                   **ya_common_args)
    priority += 1
    sublanes.append(ya_nonfiction)

    ya_nonfiction_priority = 0
    if include_best_sellers:
        ya_nonfiction_best_sellers, ignore = create(
            _db,
            Lane,
            library=library,
            display_name="Best Sellers",
            fiction=False,
            priority=ya_nonfiction_priority,
            **ya_common_args)
        ya_nonfiction_priority += 1
        ya_nonfiction_best_sellers.list_datasource = nyt_data_source
        ya_nonfiction.sublanes.append(ya_nonfiction_best_sellers)

    ya_nonfiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Biography_Memoir],
                         display_name="Biography",
                         priority=ya_nonfiction_priority,
                         **ya_common_args))
    ya_nonfiction_priority += 1
    ya_nonfiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.History, genres.Social_Sciences],
                         display_name="History & Sociology",
                         priority=ya_nonfiction_priority,
                         **ya_common_args))
    ya_nonfiction_priority += 1
    ya_nonfiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Life_Strategies],
                         priority=ya_nonfiction_priority,
                         **ya_common_args))
    ya_nonfiction_priority += 1
    ya_nonfiction.sublanes.append(
        lane_from_genres(_db,
                         library, [genres.Religion_Spirituality],
                         priority=ya_nonfiction_priority,
                         **ya_common_args))
    ya_nonfiction_priority += 1

    children_common_args = dict(common_args)
    children_common_args['audiences'] = CHILDREN

    children, ignore = create(_db,
                              Lane,
                              library=library,
                              display_name="Children and Middle Grade",
                              genres=[],
                              fiction=None,
                              sublanes=[],
                              priority=priority,
                              **children_common_args)
    priority += 1
    sublanes.append(children)

    children_priority = 0
    if include_best_sellers:
        children_best_sellers, ignore = create(_db,
                                               Lane,
                                               library=library,
                                               display_name="Best Sellers",
                                               priority=children_priority,
                                               **children_common_args)
        children_priority += 1
        children_best_sellers.list_datasource = nyt_data_source
        children.sublanes.append(children_best_sellers)

    picture_books, ignore = create(
        _db,
        Lane,
        library=library,
        display_name="Picture Books",
        target_age=(0, 4),
        genres=[],
        fiction=None,
        priority=children_priority,
        languages=languages,
    )
    children_priority += 1
    children.sublanes.append(picture_books)

    easy_readers, ignore = create(
        _db,
        Lane,
        library=library,
        display_name="Easy Readers",
        target_age=(5, 8),
        genres=[],
        fiction=None,
        priority=children_priority,
        languages=languages,
    )
    children_priority += 1
    children.sublanes.append(easy_readers)

    chapter_books, ignore = create(
        _db,
        Lane,
        library=library,
        display_name="Chapter Books",
        target_age=(9, 12),
        genres=[],
        fiction=None,
        priority=children_priority,
        languages=languages,
    )
    children_priority += 1
    children.sublanes.append(chapter_books)

    children_poetry, ignore = create(_db,
                                     Lane,
                                     library=library,
                                     display_name="Poetry Books",
                                     priority=children_priority,
                                     **children_common_args)
    children_priority += 1
    children_poetry.add_genre(genres.Poetry.name)
    children.sublanes.append(children_poetry)

    children_folklore, ignore = create(_db,
                                       Lane,
                                       library=library,
                                       display_name="Folklore",
                                       priority=children_priority,
                                       **children_common_args)
    children_priority += 1
    children_folklore.add_genre(genres.Folklore.name)
    children.sublanes.append(children_folklore)

    children_fantasy, ignore = create(_db,
                                      Lane,
                                      library=library,
                                      display_name="Fantasy",
                                      fiction=True,
                                      priority=children_priority,
                                      **children_common_args)
    children_priority += 1
    children_fantasy.add_genre(genres.Fantasy.name)
    children.sublanes.append(children_fantasy)

    children_sf, ignore = create(_db,
                                 Lane,
                                 library=library,
                                 display_name="Science Fiction",
                                 fiction=True,
                                 priority=children_priority,
                                 **children_common_args)
    children_priority += 1
    children_sf.add_genre(genres.Science_Fiction.name)
    children.sublanes.append(children_sf)

    realistic_fiction, ignore = create(_db,
                                       Lane,
                                       library=library,
                                       display_name="Realistic Fiction",
                                       fiction=True,
                                       priority=children_priority,
                                       **children_common_args)
    children_priority += 1
    realistic_fiction.add_genre(genres.Literary_Fiction.name)
    children.sublanes.append(realistic_fiction)

    children_graphic_novels, ignore = create(
        _db,
        Lane,
        library=library,
        display_name="Comics & Graphic Novels",
        priority=children_priority,
        **children_common_args)
    children_priority += 1
    children_graphic_novels.add_genre(genres.Comics_Graphic_Novels.name)
    children.sublanes.append(children_graphic_novels)

    children_biography, ignore = create(_db,
                                        Lane,
                                        library=library,
                                        display_name="Biography",
                                        priority=children_priority,
                                        **children_common_args)
    children_priority += 1
    children_biography.add_genre(genres.Biography_Memoir.name)
    children.sublanes.append(children_biography)

    children_historical_fiction, ignore = create(
        _db,
        Lane,
        library=library,
        display_name="Historical Fiction",
        priority=children_priority,
        **children_common_args)
    children_priority += 1
    children_historical_fiction.add_genre(genres.Historical_Fiction.name)
    children.sublanes.append(children_historical_fiction)

    informational, ignore = create(_db,
                                   Lane,
                                   library=library,
                                   display_name="Informational Books",
                                   fiction=False,
                                   genres=[],
                                   priority=children_priority,
                                   **children_common_args)
    children_priority += 1
    informational.add_genre(genres.Biography_Memoir.name, inclusive=False)
    children.sublanes.append(informational)

    return priority
Exemplo n.º 49
0
    def extract_edition(cls, _db, work_tag, existing_authors, **restrictions):
        """Create a new Edition object with information about a
        work (identified by OCLC Work ID).
        """
        # TODO: 'pswid' is what it's called in older representations.
        # That code can be removed once we replace all representations.
        oclc_work_id = unicode(work_tag.get('owi') or work_tag.get('pswid'))
        # if oclc_work_id:
        #     print " owi: %s" % oclc_work_id
        # else:
        #     print " No owi in %s" % etree.tostring(work_tag)

        if not oclc_work_id:
            raise ValueError("Work has no owi")

        item_type = work_tag.get("itemtype")
        if (item_type.startswith('itemtype-book')
                or item_type.startswith('itemtype-compfile')):
            medium = Edition.BOOK_MEDIUM
        elif item_type.startswith('itemtype-audiobook'
                                  ) or item_type.startswith('itemtype-music'):
            # Pretty much all Gutenberg texts, even the audio texts,
            # are based on a book, and the ones that aren't
            # (recordings of individual songs) probably aren't in OCLC
            # anyway. So we just want to get the books.
            medium = Edition.AUDIO_MEDIUM
            medium = None
        elif item_type.startswith('itemtype-video'):
            #medium = Edition.VIDEO_MEDIUM
            medium = None
        elif item_type in cls.UNUSED_MEDIA:
            medium = None
        else:
            medium = None

        # Only create Editions for books with a recognized medium
        if medium is None:
            return None, False

        result = cls._extract_basic_info(_db, work_tag, existing_authors,
                                         **restrictions)
        if not result:
            # This record did not meet one of the restrictions.
            return None, False

        title, authors_and_roles, language = result

        # Record some extra OCLC-specific information
        editions = work_tag.get('editions')
        holdings = work_tag.get('holdings')

        # Get an identifier for this work.
        identifier, ignore = Identifier.for_foreign_id(_db,
                                                       Identifier.OCLC_WORK,
                                                       oclc_work_id)

        data_source = DataSource.lookup(_db, DataSource.OCLC)
        identifier.add_measurement(data_source, Measurement.HOLDINGS, holdings)
        identifier.add_measurement(data_source, Measurement.PUBLISHED_EDITIONS,
                                   editions)

        # Create a Edition for source + identifier
        edition, new = get_one_or_create(_db,
                                         Edition,
                                         data_source=data_source,
                                         primary_identifier=identifier,
                                         create_method_kwargs=dict(
                                             title=title,
                                             language=language,
                                         ))

        # Get the most popular Dewey and LCC classification for this
        # work.
        for tag_name, subject_type in (("ddc", Subject.DDC), ("lcc",
                                                              Subject.LCC)):
            tag = cls._xpath1(work_tag,
                              "//oclc:%s/oclc:mostPopular" % tag_name)
            if tag is not None:
                id = tag.get('nsfa') or tag.get('sfa')
                weight = int(tag.get('holdings'))
                identifier.classify(data_source,
                                    subject_type,
                                    id,
                                    weight=weight)

        # Find FAST subjects for the work.
        for heading in cls._xpath(work_tag, "//oclc:fast//oclc:heading"):
            id = heading.get('ident')
            weight = int(heading.get('heldby'))
            value = heading.text
            identifier.classify(data_source, Subject.FAST, id, value, weight)

        # Associate the authors with the Edition.
        for contributor, roles in authors_and_roles:
            edition.add_contributor(contributor, roles)
        return edition, new
Exemplo n.º 50
0
    def test_reaper(self):
        feed = self.get_data("biblioboard_mini_feed.opds")

        class MockOPDSForDistributorsReaperMonitor(
                OPDSForDistributorsReaperMonitor):
            """An OPDSForDistributorsReaperMonitor that overrides _get."""
            def _get(self, url, headers):
                return (200, {
                    "content-type": OPDSFeed.ACQUISITION_FEED_TYPE
                }, feed)

        data_source = DataSource.lookup(self._db,
                                        "Biblioboard",
                                        autocreate=True)
        collection = MockOPDSForDistributorsAPI.mock_collection(self._db)
        collection.external_integration.set_setting(
            Collection.DATA_SOURCE_NAME_SETTING, data_source.name)
        monitor = MockOPDSForDistributorsReaperMonitor(
            self._db,
            collection,
            OPDSForDistributorsImporter,
            metadata_client=object())

        # There's a license pool in the database that isn't in the feed anymore.
        edition, now_gone = self._edition(
            identifier_type=Identifier.URI,
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=collection,
        )
        now_gone.licenses_owned = 1
        now_gone.licenses_available = 1

        edition, still_there = self._edition(
            identifier_type=Identifier.URI,
            identifier_id="urn:uuid:04377e87-ab69-41c8-a2a4-812d55dc0952",
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=collection,
        )
        still_there.licenses_owned = 1
        still_there.licenses_available = 1

        progress = monitor.run_once(monitor.timestamp().to_data())

        # One LicensePool has been cleared out.
        assert 0 == now_gone.licenses_owned
        assert 0 == now_gone.licenses_available

        # The other is still around.
        assert 1 == still_there.licenses_owned
        assert 1 == still_there.licenses_available

        # The TimestampData returned by run_once() describes its
        # achievements.
        assert "License pools removed: 1." == progress.achievements

        # The TimestampData does not include any timing information --
        # that will be applied by run().
        assert None == progress.start
        assert None == progress.finish
Exemplo n.º 51
0
    def test_process_batch(self):
        provider = self._provider()

        # Here are an Edition and a LicensePool for the same identifier but
        # from different data sources. We would expect this to happen
        # when talking to the open-access content server.
        edition = self._edition(data_source_name=DataSource.OA_CONTENT_SERVER)
        identifier = edition.primary_identifier

        license_source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        pool, is_new = LicensePool.for_foreign_id(
            self._db,
            license_source,
            identifier.type,
            identifier.identifier,
            collection=self._default_collection)
        eq_(None, pool.work)

        # Here's a second Edition/LicensePool that's going to cause a
        # problem: the LicensePool will show up in the results, but
        # the corresponding Edition will not.
        edition2, pool2 = self._edition(with_license_pool=True)

        # Here's an identifier that can't be looked up at all.
        identifier = self._identifier()
        messages_by_id = {
            identifier.urn: CoverageFailure(identifier, "201: try again later")
        }

        # When we call CoverageProvider.process_batch(), it's going to
        # return the information we just set up: a matched
        # Edition/LicensePool pair, a mismatched LicensePool, and an
        # error message.
        provider.queue_import_results([edition], [pool, pool2], [],
                                      messages_by_id)

        # Make the CoverageProvider do its thing.
        fake_batch = [object()]
        success, failure1, failure2 = provider.process_batch(fake_batch)

        # The fake batch was provided to lookup_and_import_batch.
        eq_([fake_batch], provider.batches)

        # The matched Edition/LicensePool pair was returned.
        eq_(success, edition.primary_identifier)

        # The LicensePool of that pair was passed into finalize_license_pool.
        # The mismatched LicensePool was not.
        eq_([pool], provider.finalized)

        # The mismatched LicensePool turned into a CoverageFailure
        # object.
        assert isinstance(failure1, CoverageFailure)
        eq_('OPDS import operation imported LicensePool, but no Edition.',
            failure1.exception)
        eq_(pool2.identifier, failure1.obj)
        eq_(True, failure1.transient)

        # The failure was returned as a CoverageFailure object.
        assert isinstance(failure2, CoverageFailure)
        eq_(identifier, failure2.obj)
        eq_(True, failure2.transient)
Exemplo n.º 52
0
    def test_work_from_metadata(self):
        """Validate the ability to create a new Work from appropriate metadata.
        """

        class Mock(MockDirectoryImportScript):
            """In this test we need to verify that annotate_metadata
            was called but did nothing.
            """
            def annotate_metadata(self, metadata, *args, **kwargs):
                metadata.annotated = True
                return super(Mock, self).annotate_metadata(
                    metadata, *args, **kwargs
                )

        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1003")
        identifier_obj, ignore = identifier.load(self._db)
        metadata = Metadata(
            DataSource.GUTENBERG,
            primary_identifier=identifier,
            title=u"A book"
        )
        metadata.annotated = False
        datasource = DataSource.lookup(self._db, DataSource.GUTENBERG)
        policy = ReplacementPolicy.from_license_source(self._db)
        mirror = MockS3Uploader()
        policy.mirror = mirror

        # Here, work_from_metadata calls annotate_metadata, but does
        # not actually import anything because there are no files 'on
        # disk' and thus no way to actually get the book.
        collection = self._default_collection
        args = (collection, metadata, policy, "cover directory",
                "ebook directory", RightsStatus.CC0)
        script = Mock(self._db)
        eq_(None, script.work_from_metadata(*args))
        eq_(True, metadata.annotated)

        # Now let's try it with some files 'on disk'.
        with open(self.sample_cover_path('test-book-cover.png')) as fh:
            image = fh.read()
        mock_filesystem = {
            'cover directory' : (
                'cover.jpg', Representation.JPEG_MEDIA_TYPE, image
            ),
            'ebook directory' : (
                'book.epub', Representation.EPUB_MEDIA_TYPE, "I'm an EPUB."
            )
        }
        script = MockDirectoryImportScript(
            self._db, mock_filesystem=mock_filesystem
        )
        work = script.work_from_metadata(*args)

        # We have created a book. It has a cover image, which has a
        # thumbnail.
        eq_("A book", work.title)
        assert work.cover_full_url.endswith(
            '/test.cover.bucket/Gutenberg/Gutenberg+ID/1003/1003.jpg'
        )
        assert work.cover_thumbnail_url.endswith(
            '/test.cover.bucket/scaled/300/Gutenberg/Gutenberg+ID/1003/1003.png'
        )
        [pool] = work.license_pools
        assert pool.open_access_download_url.endswith(
            '/test.content.bucket/Gutenberg/Gutenberg+ID/1003/A+book.epub'
        )

        eq_(RightsStatus.CC0,
            pool.delivery_mechanisms[0].rights_status.uri)

        # The mock S3Uploader has a record of 'uploading' all these files
        # to S3.
        epub, full, thumbnail = mirror.uploaded
        eq_(epub.url, pool.open_access_download_url)
        eq_(full.url, work.cover_full_url)
        eq_(thumbnail.url, work.cover_thumbnail_url)

        # The EPUB Representation was cleared out after the upload, to
        # save database space.
        eq_("I'm an EPUB.", mirror.content[0])
        eq_(None, epub.content)
Exemplo n.º 53
0
class OCLCXMLParser(XMLParser):

    # OCLC in-representation 'status codes'
    SINGLE_WORK_SUMMARY_STATUS = 0
    SINGLE_WORK_DETAIL_STATUS = 2
    MULTI_WORK_STATUS = 4
    NO_INPUT_STATUS = 100
    INVALID_INPUT_STATUS = 101
    NOT_FOUND_STATUS = 102
    UNEXPECTED_ERROR_STATUS = 200

    INTS = set([OCLC.HOLDING_COUNT, OCLC.EDITION_COUNT])

    NAMESPACES = {'oclc': 'http://classify.oclc.org'}

    LIST_TYPE = "works"
    log = logging.getLogger("OCLC XML Parser")

    @classmethod
    def parse(cls, _db, xml, **restrictions):
        """Turn XML data from the OCLC lookup service into a list of SWIDs
        (for a multi-work response) or a list of Edition
        objects (for a single-work response).
        """
        tree = etree.fromstring(xml, parser=etree.XMLParser(recover=True))
        response = cls._xpath1(tree, "oclc:response")
        representation_type = int(response.get('code'))

        workset_record = None
        editions = []
        edition_records = []

        if representation_type == cls.UNEXPECTED_ERROR_STATUS:
            raise IOError("Unexpected error from OCLC API: %s" % xml)
        elif representation_type in (cls.NO_INPUT_STATUS,
                                     cls.INVALID_INPUT_STATUS):
            return representation_type, []
        elif representation_type == cls.SINGLE_WORK_SUMMARY_STATUS:
            raise IOError(
                "Got single-work summary from OCLC despite requesting detail: %s"
                % xml)

        # The real action happens here.
        if representation_type == cls.SINGLE_WORK_DETAIL_STATUS:
            authors_tag = cls._xpath1(tree, "//oclc:authors")

            work_tag = cls._xpath1(tree, "//oclc:work")
            if work_tag is not None:
                author_string = work_tag.get('author')
                primary_author = cls.primary_author_from_author_string(
                    _db, author_string)

            existing_authors = cls.extract_authors(
                _db, authors_tag, primary_author=primary_author)

            # The representation lists a single work, its authors, its editions,
            # plus summary classification information for the work.
            edition, ignore = cls.extract_edition(_db, work_tag,
                                                  existing_authors,
                                                  **restrictions)
            if edition:
                cls.log.info("EXTRACTED %r", edition)
            records = []
            if edition:
                records.append(edition)
            else:
                # The work record itself failed one of the
                # restrictions. None of its editions are likely to
                # succeed either.
                return representation_type, records

        elif representation_type == cls.MULTI_WORK_STATUS:
            # The representation lists a set of works that match the
            # search query.
            cls.log.debug("Extracting SWIDs from search results.")
            records = cls.extract_swids(_db, tree, **restrictions)
        elif representation_type == cls.NOT_FOUND_STATUS:
            # No problem; OCLC just doesn't have any data.
            records = []
        else:
            raise IOError("Unrecognized status code from OCLC API: %s (%s)" %
                          (representation_type, xml))

        return representation_type, records

    @classmethod
    def extract_swids(cls, _db, tree, **restrictions):
        """Turn a multi-work response into a list of SWIDs."""

        swids = []
        for work_tag in cls._xpath(tree, "//oclc:work"):
            # We're not calling extract_basic_info because we care about
            # the info, we're calling it to make sure this work meets
            # the restriction. If this work meets the restriction,
            # we'll store its info when we look up the SWID.
            response = cls._extract_basic_info(_db, work_tag, **restrictions)
            if response:
                title, author_names, language = response
                # TODO: 'swid' is what it's called in older representations.
                # That code can be removed once we replace all representations.
                work_identifier = work_tag.get('wi') or work_tag.get('swid')
                cls.log.debug("WORK ID %s (%s, %r, %s)", work_identifier,
                              title, author_names, language)
                swids.append(work_identifier)
        return swids

    ROLES = re.compile("\[([^]]+)\]$")
    LIFESPAN = re.compile("([0-9]+)-([0-9]*)[.;]?$")

    @classmethod
    def extract_authors(cls, _db, authors_tag, primary_author=None):
        results = []
        if authors_tag is not None:
            for author_tag in cls._xpath(authors_tag, "//oclc:author"):
                lc = author_tag.get('lc', None)
                viaf = author_tag.get('viaf', None)
                contributor, roles, default_role_used = cls._parse_single_author(
                    _db,
                    author_tag.text,
                    lc=lc,
                    viaf=viaf,
                    primary_author=primary_author)
                if contributor:
                    results.append(contributor)

        return results

    @classmethod
    def _contributor_match(cls, contributor, name, lc, viaf):
        return (contributor.sort_name == name
                and (lc is None or contributor.lc == lc)
                and (viaf is None or contributor.viaf == viaf))

    @classmethod
    def _parse_single_author(cls,
                             _db,
                             author,
                             lc=None,
                             viaf=None,
                             existing_authors=[],
                             default_role=Contributor.AUTHOR_ROLE,
                             primary_author=None):
        default_role_used = False
        # First find roles if present
        # "Giles, Lionel, 1875-1958 [Writer of added commentary; Translator]"
        author = author.strip()
        m = cls.ROLES.search(author)
        if m:
            author = author[:m.start()].strip()
            role_string = m.groups()[0]
            roles = [x.strip() for x in role_string.split(";")]
        elif default_role:
            roles = [default_role]
            default_role_used = True
        else:
            roles = []

        # Author string now looks like
        # "Giles, Lionel, 1875-1958"
        m = cls.LIFESPAN.search(author)
        kwargs = dict()
        if m:
            author = author[:m.start()].strip()
            birth, death = m.groups()
            if birth:
                kwargs[Contributor.BIRTH_DATE] = birth
            if death:
                kwargs[Contributor.DEATH_DATE] = death

        # Author string now looks like
        # "Giles, Lionel,"
        if author.endswith(","):
            author = author[:-1]

        contributor = None
        if not author:
            # No name was given for the author.
            return None, roles, default_role_used

        if primary_author and author == primary_author.sort_name:
            if Contributor.AUTHOR_ROLE in roles:
                roles.remove(Contributor.AUTHOR_ROLE)
            if Contributor.UNKNOWN_ROLE in roles:
                roles.remove(Contributor.UNKNOWN_ROLE)
            roles.insert(0, Contributor.PRIMARY_AUTHOR_ROLE)

        if existing_authors:
            # Calling Contributor.lookup will result in a database
            # hit, and looking up a contributor based on name may
            # result in multiple results (see below). We'll have no
            # way of distinguishing between those results. If
            # possible, it's much more reliable to look through
            # existing_authors (the authors derived from an entry's
            # <authors> tag).
            for x in existing_authors:
                if cls._contributor_match(x, author, lc, viaf):
                    contributor = x
                    break
            if contributor:
                was_new = False

        if not contributor:
            contributor, was_new = Contributor.lookup(_db,
                                                      author,
                                                      viaf,
                                                      lc,
                                                      extra=kwargs)
        if isinstance(contributor, list):
            # We asked for an author based solely on the name, which makes
            # Contributor.lookup() return a list.
            if len(contributor) == 1:
                # Fortunately, either the database knows about only
                # one author with that name, or it didn't know about
                # any authors with that name and it just created one,
                # so we can unambiguously use it.
                contributor = contributor[0]
            else:
                # Uh-oh. The database knows about multiple authors
                # with that name.  We have no basis for deciding which
                # author we mean. But we would prefer to identify with
                # an author who has a known LC or VIAF number.
                #
                # This should happen very rarely because of our check
                # against existing_authors above. But it will happen
                # for authors that have a work in Project Gutenberg.
                with_id = [
                    x for x in contributor
                    if x.lc is not None or x.viaf is not None
                ]
                if with_id:
                    contributor = with_id[0]
                else:
                    contributor = contributor[0]
        return contributor, roles, default_role_used

    @classmethod
    def primary_author_from_author_string(cls, _db, author_string):
        # If the first author mentioned in the author string
        # does not have an explicit role set, treat them as the primary
        # author.
        if not author_string:
            return None
        authors = author_string.split("|")
        if not authors:
            return None
        author, roles, default_role_used = cls._parse_single_author(
            _db, authors[0], default_role=Contributor.PRIMARY_AUTHOR_ROLE)
        if roles == [Contributor.PRIMARY_AUTHOR_ROLE]:
            return author
        return None

    @classmethod
    def parse_author_string(cls,
                            _db,
                            author_string,
                            existing_authors=[],
                            primary_author=None):
        default_role = Contributor.PRIMARY_AUTHOR_ROLE
        authors = []
        if not author_string:
            return authors
        for author in author_string.split("|"):
            author, roles, default_role_used = cls._parse_single_author(
                _db,
                author,
                existing_authors=existing_authors,
                default_role=default_role,
                primary_author=primary_author)
            if roles:
                if Contributor.PRIMARY_AUTHOR_ROLE in roles:
                    # That was the primary author.  If we see someone
                    # with no explicit role after this point, they're
                    # just a regular author.
                    default_role = Contributor.AUTHOR_ROLE
                elif not default_role_used:
                    # We're dealing with someone whose role was
                    # explicitly specified. If we see someone with no
                    # explicit role after this point, it's probably
                    # because their role is so minor as to not be
                    # worth mentioning, not because it's so major that
                    # we can assume they're an author.
                    default_role = Contributor.UNKNOWN_ROLE
            roles = roles or [default_role]
            if author:
                authors.append((author, roles))
        return authors

    @classmethod
    def _extract_basic_info(cls,
                            _db,
                            tag,
                            existing_authors=None,
                            **restrictions):
        """Extract information common to work tag and edition tag."""
        title = tag.get('title')
        author_string = tag.get('author')
        authors_and_roles = cls.parse_author_string(_db, author_string,
                                                    existing_authors)
        if 'language' in tag.keys():
            language = tag.get('language')
        else:
            language = None

        if title and 'title' in restrictions:
            must_resemble_title = restrictions['title']
            threshold = restrictions.get('title_similarity', 0.25)
            similarity = MetadataSimilarity.title_similarity(
                must_resemble_title, title)
            if similarity < threshold:
                # The title of the book under consideration is not
                # similar enough to the given title.
                cls.log.debug("FAILURE TO RESEMBLE: %s vs %s (%.2f)", title,
                              must_resemble_title, similarity)
                return None

            # The semicolon is frequently used to separate multiple
            # works in an anthology. If there is no semicolon in the
            # original title, do not consider titles that contain
            # semicolons.
            if (not ' ; ' in must_resemble_title and ' ; ' in title
                    and threshold > 0):
                cls.log.debug("SEMICOLON DISQUALIFICATION: %s", title)
                return None

        # Apply restrictions. If they're not met, return None.
        if 'language' in restrictions and language:
            # We know which language this record is for. Match it
            # against the language used in the Edition we're
            # matching against.
            restrict_to_language = set(restrictions['language'])
            if language != restrict_to_language:
                # This record is for a book in a different language
                cls.log.debug("WRONG LANGUAGE: %s", language)
                return None

        if 'authors' in restrictions:
            restrict_to_authors = restrictions['authors']
            if restrict_to_authors and isinstance(restrict_to_authors[0],
                                                  Contributor):
                restrict_to_authors = [
                    x.sort_name for x in restrict_to_authors
                ]
            primary_author = None

            for a, roles in authors_and_roles:
                if Contributor.PRIMARY_AUTHOR_ROLE in roles:
                    primary_author = a
                    break
            if (not primary_author or
                (primary_author not in restrict_to_authors
                 and primary_author.sort_name not in restrict_to_authors)):
                # None of the given authors showed up as the
                # primary author of this book. They may have had
                # some other role in it, or the book may be about
                # them, or incorporate their work, but this book
                # is not *by* them.
                return None

        author_names = ", ".join([x.sort_name for x, y in authors_and_roles])

        return title, authors_and_roles, language

    UNUSED_MEDIA = set([
        "itemtype-intmm",
        "itemtype-msscr",
        "itemtype-artchap-artcl",
        "itemtype-jrnl",
        "itemtype-map",
        "itemtype-vis",
        "itemtype-jrnl-digital",
        "itemtype-image-2d",
        "itemtype-artchap-digital",
        "itemtype-intmm-digital",
        "itemtype-archv",
        "itemtype-msscr-digital",
        "itemtype-game",
        "itemtype-web-digital",
        "itemtype-map-digital",
    ])

    @classmethod
    def extract_edition(cls, _db, work_tag, existing_authors, **restrictions):
        """Create a new Edition object with information about a
        work (identified by OCLC Work ID).
        """
        # TODO: 'pswid' is what it's called in older representations.
        # That code can be removed once we replace all representations.
        oclc_work_id = unicode(work_tag.get('owi') or work_tag.get('pswid'))
        # if oclc_work_id:
        #     print " owi: %s" % oclc_work_id
        # else:
        #     print " No owi in %s" % etree.tostring(work_tag)

        if not oclc_work_id:
            raise ValueError("Work has no owi")

        item_type = work_tag.get("itemtype")
        if (item_type.startswith('itemtype-book')
                or item_type.startswith('itemtype-compfile')):
            medium = Edition.BOOK_MEDIUM
        elif item_type.startswith('itemtype-audiobook'
                                  ) or item_type.startswith('itemtype-music'):
            # Pretty much all Gutenberg texts, even the audio texts,
            # are based on a book, and the ones that aren't
            # (recordings of individual songs) probably aren't in OCLC
            # anyway. So we just want to get the books.
            medium = Edition.AUDIO_MEDIUM
            medium = None
        elif item_type.startswith('itemtype-video'):
            #medium = Edition.VIDEO_MEDIUM
            medium = None
        elif item_type in cls.UNUSED_MEDIA:
            medium = None
        else:
            medium = None

        # Only create Editions for books with a recognized medium
        if medium is None:
            return None, False

        result = cls._extract_basic_info(_db, work_tag, existing_authors,
                                         **restrictions)
        if not result:
            # This record did not meet one of the restrictions.
            return None, False

        title, authors_and_roles, language = result

        # Record some extra OCLC-specific information
        editions = work_tag.get('editions')
        holdings = work_tag.get('holdings')

        # Get an identifier for this work.
        identifier, ignore = Identifier.for_foreign_id(_db,
                                                       Identifier.OCLC_WORK,
                                                       oclc_work_id)

        data_source = DataSource.lookup(_db, DataSource.OCLC)
        identifier.add_measurement(data_source, Measurement.HOLDINGS, holdings)
        identifier.add_measurement(data_source, Measurement.PUBLISHED_EDITIONS,
                                   editions)

        # Create a Edition for source + identifier
        edition, new = get_one_or_create(_db,
                                         Edition,
                                         data_source=data_source,
                                         primary_identifier=identifier,
                                         create_method_kwargs=dict(
                                             title=title,
                                             language=language,
                                         ))

        # Get the most popular Dewey and LCC classification for this
        # work.
        for tag_name, subject_type in (("ddc", Subject.DDC), ("lcc",
                                                              Subject.LCC)):
            tag = cls._xpath1(work_tag,
                              "//oclc:%s/oclc:mostPopular" % tag_name)
            if tag is not None:
                id = tag.get('nsfa') or tag.get('sfa')
                weight = int(tag.get('holdings'))
                identifier.classify(data_source,
                                    subject_type,
                                    id,
                                    weight=weight)

        # Find FAST subjects for the work.
        for heading in cls._xpath(work_tag, "//oclc:fast//oclc:heading"):
            id = heading.get('ident')
            weight = int(heading.get('heldby'))
            value = heading.text
            identifier.classify(data_source, Subject.FAST, id, value, weight)

        # Associate the authors with the Edition.
        for contributor, roles in authors_and_roles:
            edition.add_contributor(contributor, roles)
        return edition, new

    @classmethod
    def extract_edition_record(cls, _db, edition_tag, existing_authors,
                               **restrictions):
        """Create a new Edition object with information about an
        edition of a book (identified by OCLC Number).
        """
        oclc_number = unicode(edition_tag.get('oclc'))
        try:
            int(oclc_number)
        except ValueError, e:
            # This record does not have a valid OCLC number.
            return None, False

        # Fill in some basic information about this new record.
        result = cls._extract_basic_info(_db, edition_tag, existing_authors,
                                         **restrictions)
        if not result:
            # This record did not meet one of the restrictions.
            return None, False

        title, authors_and_roles, language = result

        # Add a couple extra bits of OCLC-specific information.
        extra = {
            OCLC.HOLDING_COUNT: edition_tag.get('holdings'),
            OCLC.FORMAT: edition_tag.get('itemtype'),
        }

        # Get an identifier for this edition.
        identifier, ignore = Identifier.for_foreign_id(_db,
                                                       Identifier.OCLC_NUMBER,
                                                       oclc_number)

        # Create a Edition for source + identifier
        data_source = DataSource.lookup(_db, DataSource.OCLC)
        edition_record, new = get_one_or_create(_db,
                                                Edition,
                                                data_source=data_source,
                                                primary_identifier=identifier,
                                                create_method_kwargs=dict(
                                                    title=title,
                                                    language=language,
                                                    subjects=subjects,
                                                    extra=extra,
                                                ))

        subjects = {}
        for subject_type, oclc_code in ((Subject.LCC, "050"), (Subject.DDC,
                                                               "082")):
            classification = cls._xpath1(
                edition_tag,
                "oclc:classifications/oclc:class[@tag=%s]" % oclc_code)
            if classification is not None:
                value = classification.get("nsfa") or classification.get('sfa')
                identifier.classify(data_source, subject_type, value)

        # Associated each contributor with the new record.
        for author, roles in authors_and_roles:
            edition_record.add_contributor(author, roles)
        return edition_record, new
    url = "%s/libraries/%s/patrons/%s" % (api.base_url, api.library_id, patron)
    response = api.request(url)
    return response.status_code == 200


# See if we have multiple RB Digital collections
rb_digital_collections = _db.query(Collection.id)\
    .select_from(ExternalIntegration)\
    .join(ExternalIntegration.collections)\
    .filter(ExternalIntegration.protocol == ExternalIntegration.RB_DIGITAL)\
    .all()

# We don't have to do any validation just update the credentials table
if len(rb_digital_collections) == 1:
    rb_collection_id = rb_digital_collections[0][0]
    source = DataSource.lookup(_db, DataSource.RB_DIGITAL)
    update_statement = update(Credential)\
        .where(and_(
            Credential.data_source_id == source.id,
            Credential.type == Credential.IDENTIFIER_FROM_REMOTE_SERVICE
        ))\
        .values(collection_id=rb_collection_id)
    _db.execute(update_statement)

# We have multiple RBDigital integration and we don't know which credential
# belongs to each one. Have to check each credential against RBDigital API.
else:
    rb_api = []
    for collection_id in rb_digital_collections:
        collection = Collection.by_id(_db, collection_id[0])
        rb_api.append(RBDigitalAPI(_db, collection))
Exemplo n.º 55
0
 def setup(self):
     super(TestURNLookupController, self).setup()
     self.controller = URNLookupController(self._db)
     self.source = DataSource.lookup(self._db,
                                     DataSource.INTERNAL_PROCESSING)
Exemplo n.º 56
0
    def test_fulfill(self):
        patron = self._patron()

        data_source = DataSource.lookup(self._db,
                                        "Biblioboard",
                                        autocreate=True)
        edition, pool = self._edition(
            identifier_type=Identifier.URI,
            data_source_name=data_source.name,
            with_license_pool=True,
            collection=self.collection,
        )
        # This pool doesn't have an acquisition link, so
        # we can't fulfill it yet.
        assert_raises(CannotFulfill, self.api.fulfill, patron, "1234", pool,
                      Representation.EPUB_MEDIA_TYPE)

        # Set up an epub acquisition link for the pool.
        url = self._url
        link, ignore = pool.identifier.add_link(
            Hyperlink.GENERIC_OPDS_ACQUISITION,
            url,
            data_source,
            Representation.EPUB_MEDIA_TYPE,
        )
        pool.set_delivery_mechanism(
            Representation.EPUB_MEDIA_TYPE,
            DeliveryMechanism.NO_DRM,
            RightsStatus.IN_COPYRIGHT,
            link.resource,
        )

        # Set the API's auth url so it doesn't have to get it -
        # that's tested in test_get_token.
        self.api.auth_url = "http://auth"

        token_response = json.dumps({
            "access_token": "token",
            "expires_in": 60
        })
        self.api.queue_response(200, content=token_response)

        fulfillment_time = datetime.datetime.utcnow()
        fulfillment_info = self.api.fulfill(patron, "1234", pool,
                                            Representation.EPUB_MEDIA_TYPE)
        eq_(self.collection.id, fulfillment_info.collection_id)
        eq_(data_source.name, fulfillment_info.data_source_name)
        eq_(Identifier.URI, fulfillment_info.identifier_type)
        eq_(pool.identifier.identifier, fulfillment_info.identifier)
        eq_(None, fulfillment_info.content_link)

        eq_(DeliveryMechanism.BEARER_TOKEN, fulfillment_info.content_type)
        bearer_token_document = json.loads(fulfillment_info.content)
        expires_in = bearer_token_document['expires_in']
        assert expires_in < 60
        eq_("Bearer", bearer_token_document['token_type'])
        eq_("token", bearer_token_document['access_token'])
        eq_(url, bearer_token_document['location'])

        # The FulfillmentInfo's content_expires is approximately the
        # time you get if you add the number of seconds until the
        # bearer token expires to the time at which the title was
        # originally fulfilled.
        expect_expiration = fulfillment_time + datetime.timedelta(
            seconds=expires_in)
        assert abs((fulfillment_info.content_expires -
                    expect_expiration).total_seconds()) < 5
Exemplo n.º 57
0
 def data_source(self):
     return DataSource.lookup(self._db, DataSource.CONTENT_CAFE)
Exemplo n.º 58
0
    def test_recursively_equivalent_identifiers(self):

        # We start with a Gutenberg book.
        gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG)
        record, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                                Identifier.GUTENBERG_ID, "100")
        gutenberg_id = record.primary_identifier

        # We use OCLC Classify to do a title/author lookup.
        oclc = DataSource.lookup(self._db, DataSource.OCLC)
        search_id, ignore = Identifier.for_foreign_id(self._db,
                                                      Identifier.OCLC_WORK,
                                                      "60010")
        gutenberg_id.equivalent_to(oclc, search_id, 1)

        # The title/author lookup associates the search term with two
        # different OCLC Numbers.
        oclc_id, ignore = Identifier.for_foreign_id(self._db,
                                                    Identifier.OCLC_NUMBER,
                                                    "9999")
        oclc_id_2, ignore = Identifier.for_foreign_id(self._db,
                                                      Identifier.OCLC_NUMBER,
                                                      "1000")

        search_id.equivalent_to(oclc, oclc_id, 1)
        search_id.equivalent_to(oclc, oclc_id_2, 1)

        # We then use OCLC Linked Data to connect one of the OCLC
        # Numbers with an ISBN.
        linked_data = DataSource.lookup(self._db, DataSource.OCLC_LINKED_DATA)
        isbn_id, ignore = Identifier.for_foreign_id(self._db, Identifier.ISBN,
                                                    "900100434X")
        oclc_id.equivalent_to(linked_data, isbn_id, 1)

        # As it turns out, we have an Overdrive work record...
        overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE)
        overdrive_record, ignore = Edition.for_foreign_id(
            self._db, overdrive, Identifier.OVERDRIVE_ID, "{111-222}")
        overdrive_id = overdrive_record.primary_identifier

        # ...which is tied (by Overdrive) to the same ISBN.
        overdrive_id.equivalent_to(overdrive, isbn_id, 1)

        # Finally, here's a completely unrelated Edition, which
        # will not be showing up.
        gutenberg2, ignore = Edition.for_foreign_id(self._db, gutenberg,
                                                    Identifier.GUTENBERG_ID,
                                                    "200")
        gutenberg2.title = "Unrelated Gutenberg record."

        levels = [
            record.equivalent_identifiers(policy=PresentationCalculationPolicy(
                equivalent_identifier_levels=i)) for i in range(0, 5)
        ]

        # At level 0, the only identifier found is the Gutenberg ID.
        assert set([gutenberg_id]) == set(levels[0])

        # At level 1, we pick up the title/author lookup.
        assert set([gutenberg_id, search_id]) == set(levels[1])

        # At level 2, we pick up the title/author lookup and the two
        # OCLC Numbers.
        assert set([gutenberg_id, search_id, oclc_id,
                    oclc_id_2]) == set(levels[2])

        # At level 3, we also pick up the ISBN.
        assert set([gutenberg_id, search_id, oclc_id, oclc_id_2,
                    isbn_id]) == set(levels[3])

        # At level 4, the recursion starts to go in the other
        # direction: we pick up the Overdrive ID that's equivalent to
        # the same ISBN as the OCLC Number.
        assert set([
            gutenberg_id, search_id, oclc_id, oclc_id_2, isbn_id, overdrive_id
        ]) == set(levels[4])
Exemplo n.º 59
0
 def source(self):
     return DataSource.lookup(self._db, DataSource.NOVELIST)
Exemplo n.º 60
0
    def custom_lists(self, identifier_type, identifier):
        self.require_librarian(flask.request.library)

        library = flask.request.library
        work = self.load_work(library, identifier_type, identifier)
        if isinstance(work, ProblemDetail):
            return work

        staff_data_source = DataSource.lookup(self._db,
                                              DataSource.LIBRARY_STAFF)

        if flask.request.method == "GET":
            lists = []
            for entry in work.custom_list_entries:
                list = entry.customlist
                lists.append(dict(id=list.id, name=list.name))
            return dict(custom_lists=lists)

        if flask.request.method == "POST":
            lists = flask.request.form.get("lists")
            if lists:
                lists = json.loads(lists)
            else:
                lists = []

            affected_lanes = set()

            # Remove entries for lists that were not in the submitted form.
            submitted_ids = [l.get("id") for l in lists if l.get("id")]
            for entry in work.custom_list_entries:
                if entry.list_id not in submitted_ids:
                    list = entry.customlist
                    list.remove_entry(work)
                    for lane in Lane.affected_by_customlist(list):
                        affected_lanes.add(lane)

            # Add entries for any new lists.
            for list_info in lists:
                id = list_info.get("id")
                name = list_info.get("name")

                if id:
                    is_new = False
                    list = get_one(self._db,
                                   CustomList,
                                   id=int(id),
                                   name=name,
                                   library=library,
                                   data_source=staff_data_source)
                    if not list:
                        self._db.rollback()
                        return MISSING_CUSTOM_LIST.detailed(
                            _("Could not find list \"%(list_name)s\"",
                              list_name=name))
                else:
                    list, is_new = create(self._db,
                                          CustomList,
                                          name=name,
                                          data_source=staff_data_source,
                                          library=library)
                    list.created = datetime.now()
                entry, was_new = list.add_entry(work, featured=True)
                if was_new:
                    for lane in Lane.affected_by_customlist(list):
                        affected_lanes.add(lane)

            # If any list changes affected lanes, update their sizes.
            # NOTE: This may not make a difference until the
            # works are actually re-indexed.
            for lane in affected_lanes:
                lane.update_size(self._db, self.search_engine)

            return Response(unicode(_("Success")), 200)