Пример #1
0
    def test_initialization(self):
        with pytest.raises(ValueError) as excinfo:
            ContributorLane(self._default_library, None)
        assert "ContributorLane can't be created without contributor" in str(
            excinfo.value
        )

        parent = WorkList()
        parent.initialize(self._default_library)

        lane = ContributorLane(
            self._default_library,
            self.contributor,
            parent,
            languages=["a"],
            audiences=["b"],
        )
        assert self.contributor == lane.contributor
        assert ["a"] == lane.languages
        assert ["b"] == lane.audiences
        assert [lane] == parent.children

        # The contributor_key will be used in links to other pages
        # of this Lane and so on.
        assert "Lois Lane" == lane.contributor_key

        # If the contributor used to create a ContributorLane has no
        # display name, their sort name is used as the
        # contributor_key.
        contributor = ContributorData(sort_name="Lane, Lois")
        lane = ContributorLane(self._default_library, contributor)
        assert contributor == lane.contributor
        assert "Lane, Lois" == lane.contributor_key
Пример #2
0
    def test_lifecycle_with_worklist(self):
        facets = Facets.default(self._default_library)
        pagination = Pagination.default()
        lane = WorkList()
        lane.initialize(self._default_library)

        # Fetch a cached feed from the database. It comes out updated.
        refresher = MockFeedGenerator()
        args = (self._db, lane, facets, pagination, refresher)
        feed = CachedFeed.fetch(*args, max_age=0, raw=True)
        assert "This is feed #1" == feed.content

        assert pagination.query_string == feed.pagination
        assert facets.query_string == feed.facets
        assert None == feed.lane_id
        assert lane.unique_key == feed.unique_key

        # Fetch it again, with a high max_age, and it's cached!
        feed = CachedFeed.fetch(*args, max_age=1000, raw=True)
        assert "This is feed #1" == feed.content

        # Fetch it with a low max_age, and it gets updated again.
        feed = CachedFeed.fetch(*args, max_age=0, raw=True)
        assert "This is feed #2" == feed.content

        # The special constant CACHE_FOREVER means it's always cached.
        feed = CachedFeed.fetch(*args, max_age=CachedFeed.CACHE_FOREVER, raw=True)
        assert "This is feed #2" == feed.content
Пример #3
0
    def test_initialization(self):
        assert_raises_regexp(
            ValueError, "ContributorLane can't be created without contributor",
            ContributorLane, self._default_library, None)

        parent = WorkList()
        parent.initialize(self._default_library)

        lane = ContributorLane(
            self._default_library,
            self.contributor,
            parent,
            languages=['a'],
            audiences=['b'],
        )
        eq_(self.contributor, lane.contributor)
        eq_(['a'], lane.languages)
        eq_(['b'], lane.audiences)
        eq_([lane], parent.children)

        # The contributor_key will be used in links to other pages
        # of this Lane and so on.
        eq_("Lois Lane", lane.contributor_key)

        # If the contributor used to create a ContributorLane has no
        # display name, their sort name is used as the
        # contributor_key.
        contributor = ContributorData(sort_name="Lane, Lois")
        lane = ContributorLane(self._default_library, contributor)
        eq_(contributor, lane.contributor)
        eq_("Lane, Lois", lane.contributor_key)
Пример #4
0
 def make_child():
     # Set up a WorkList with settings that contradict the
     # settings of the work we'll be using as the basis for our
     # WorkBasedLane.
     child = WorkList()
     child.initialize(self._default_library,
                      'sublane',
                      languages=['eng'],
                      audiences=[Classifier.AUDIENCE_ADULT])
     return child
Пример #5
0
    def test_load_facets_from_request(self):
        # The library has two EntryPoints enabled.
        self._default_library.setting(
            EntryPoint.ENABLED_SETTING).value = json.dumps([
                EbooksEntryPoint.INTERNAL_NAME,
                AudiobooksEntryPoint.INTERNAL_NAME
            ])

        with self.app.test_request_context("/?order=%s" % Facets.ORDER_TITLE):
            flask.request.library = self._default_library
            facets = load_facets_from_request()
            assert Facets.ORDER_TITLE == facets.order
            # Enabled facets are passed in to the newly created Facets,
            # in case the load method received a custom config.
            assert facets.facets_enabled_at_init != None

        with self.app.test_request_context("/?order=bad_facet"):
            flask.request.library = self._default_library
            problemdetail = load_facets_from_request()
            assert INVALID_INPUT.uri == problemdetail.uri

        # An EntryPoint will be picked up from the request and passed
        # into the Facets object, assuming the EntryPoint is
        # configured on the present library.
        worklist = WorkList()
        worklist.initialize(self._default_library)
        with self.app.test_request_context("/?entrypoint=Audio"):
            flask.request.library = self._default_library
            facets = load_facets_from_request(worklist=worklist)
            assert AudiobooksEntryPoint == facets.entrypoint
            assert False == facets.entrypoint_is_default

        # If the requested EntryPoint not configured, the default
        # EntryPoint is used.
        with self.app.test_request_context("/?entrypoint=NoSuchEntryPoint"):
            flask.request.library = self._default_library
            default_entrypoint = object()
            facets = load_facets_from_request(
                worklist=worklist, default_entrypoint=default_entrypoint)
            assert default_entrypoint == facets.entrypoint
            assert True == facets.entrypoint_is_default

        # Load a SearchFacets object that pulls information from an
        # HTTP header.
        with self.app.test_request_context("/",
                                           headers={"Accept-Language": "ja"}):
            flask.request.library = self._default_library
            facets = load_facets_from_request(base_class=SearchFacets)
            assert ["jpn"] == facets.languages
Пример #6
0
def load_lanes(_db, library):
    """Return a WorkList that reflects the current lane structure of the
    Library.

    If no top-level visible lanes are configured, the WorkList will be
    configured to show every book in the collection.

    If a single top-level Lane is configured, it will returned as the
    WorkList.

    Otherwise, a WorkList containing the visible top-level lanes is
    returned.
    """
    top_level = WorkList.top_level_for_library(_db, library)

    # It's likely this WorkList will be used across sessions, so
    # expunge any data model objects from the database session.
    #
    # TODO: This is the cause of a lot of problems in the cached OPDS
    # feed generator. There, these Lanes are used in a normal database
    # session and we end up needing hacks to merge them back into the
    # session.
    if isinstance(top_level, Lane):
        to_expunge = [top_level]
    else:
        to_expunge = [x for x in top_level.children if isinstance(x, Lane)]
    map(_db.expunge, to_expunge)
    return top_level
Пример #7
0
    def test_should_process_lane(self):
        parent = self._lane()
        parent.size = 100
        child = self._lane(parent=parent)
        child.size = 10
        grandchild = self._lane(parent=child)
        grandchild.size = 1
        wl = WorkList()
        empty = self._lane(fiction=False)
        empty.size = 0

        script = CacheMARCFiles(self._db, cmd_args=[])
        script.max_depth = 1
        eq_(True, script.should_process_lane(parent))
        eq_(True, script.should_process_lane(child))
        eq_(False, script.should_process_lane(grandchild))
        eq_(True, script.should_process_lane(wl))
        eq_(False, script.should_process_lane(empty))

        script.max_depth = 0
        eq_(True, script.should_process_lane(parent))
        eq_(False, script.should_process_lane(child))
        eq_(False, script.should_process_lane(grandchild))
        eq_(True, script.should_process_lane(wl))
        eq_(False, script.should_process_lane(empty))
Пример #8
0
def load_lanes(_db, library):
    """Return a WorkList that reflects the current lane structure of the
    Library.

    If no top-level visible lanes are configured, the WorkList will be
    configured to show every book in the collection.

    If a single top-level Lane is configured, it will returned as the
    WorkList.

    Otherwise, a WorkList containing the visible top-level lanes is
    returned.
    """
    top_level = WorkList.top_level_for_library(_db, library)

    # It's likely this WorkList will be used across sessions, so
    # expunge any data model objects from the database session.
    if isinstance(top_level, Lane):
        to_expunge = [top_level]
    else:
        to_expunge = [x for x in top_level.children if isinstance(x, Lane)]

    map(_db.expunge, to_expunge)
    return top_level
Пример #9
0
def load_lanes(_db, library):
    """Return a WorkList that reflects the current lane structure of the
    Library.

    If no top-level visible lanes are configured, the WorkList will be
    configured to show every book in the collection.

    If a single top-level Lane is configured, it will returned as the
    WorkList.

    Otherwise, a WorkList containing the visible top-level lanes is
    returned.
    """
    top_level = WorkList.top_level_for_library(_db, library)

    # It's likely this WorkList will be used across sessions, so
    # expunge any data model objects from the database session.
    if isinstance(top_level, Lane):
        to_expunge = [top_level]
    else:
        to_expunge = [x for x in top_level.children if isinstance(x, Lane)]

    map(_db.expunge, to_expunge)
    return top_level
Пример #10
0
    def test_do_generate_handles_all_entrypoints(self):
        self.called_with = []

        @classmethod
        def mock_groups(cls, *args, **kwargs):
            self.called_with.append((args, kwargs))

        old_groups = AcquisitionFeed.groups
        AcquisitionFeed.groups = mock_groups

        # Here's a normal WorkList with no EntryPoints.
        worklist = WorkList()
        library = self._default_library
        worklist.initialize(library)
        script = CacheOPDSGroupFeedPerLane(self._db, cmd_args=[])
        with script.app.test_request_context("/"):
            list(script.do_generate(worklist))

        # AcquisitionFeed.groups was called once, with a FeaturedFacets
        # object that did not include an EntryPoint.
        args, kwargs = self.called_with.pop()
        facets = kwargs['facets']
        assert isinstance(facets, FeaturedFacets)
        eq_(library.minimum_featured_quality, facets.minimum_featured_quality)
        eq_(worklist.uses_customlists, facets.uses_customlists)
        eq_(None, facets.entrypoint)

        # Now give the WorkList some EntryPoints.
        worklist.initialize(
            library, entrypoints=[AudiobooksEntryPoint, EbooksEntryPoint])
        with script.app.test_request_context("/"):
            list(script.do_generate(worklist))

        # AcquisitionFeed.groups was called once for each
        # EntryPoint available to the WorkList.
        eq_([AudiobooksEntryPoint, EbooksEntryPoint], [
            kwargs['facets'].entrypoint for (args, kwargs) in self.called_with
        ])

        AcquisitionFeed.groups = old_groups
Пример #11
0
    def test__prepare_keys(self):
        # Verify the method that turns WorkList, Facets, and Pagination
        # into a unique set of values for CachedFeed fields.

        # First, prepare some mock classes.
        class MockCachedFeed(CachedFeed):
            feed_type_called_with = None

            @classmethod
            def feed_type(cls, worklist, facets):
                cls.feed_type_called_with = (worklist, facets)
                return "mock type"

        class MockFacets(object):
            query_string = b"facets query string"

        class MockPagination(object):
            query_string = b"pagination query string"

        m = MockCachedFeed._prepare_keys
        # A WorkList of some kind is required.
        with pytest.raises(ValueError) as excinfo:
            m(self._db, None, MockFacets, MockPagination)
        assert "Cannot prepare a CachedFeed without a WorkList." in str(excinfo.value)

        # Basic Lane case, no facets or pagination.
        lane = self._lane()

        # The response object is a named tuple. feed_type, library and
        # lane_id are the only members set.
        keys = m(self._db, lane, None, None)
        assert "mock type" == keys.feed_type
        assert lane.library == keys.library
        assert None == keys.work
        assert lane.id == keys.lane_id
        assert None == keys.unique_key
        assert "" == keys.facets_key
        assert "" == keys.pagination_key

        # When pagination and/or facets are available, facets_key and
        # pagination_key are set appropriately.
        keys = m(self._db, lane, MockFacets, MockPagination)
        assert "facets query string" == keys.facets_key
        assert "pagination query string" == keys.pagination_key

        # Now we can check that feed_type was obtained by passing
        # `worklist` and `facets` into MockCachedFeed.feed_type.
        assert "mock type" == keys.feed_type
        assert (lane, MockFacets) == MockCachedFeed.feed_type_called_with

        # When a WorkList is used instead of a Lane, keys.lane_id is None
        # but keys.unique_key is set to worklist.unique_key.
        worklist = WorkList()
        worklist.initialize(
            library=self._default_library,
            display_name="wl",
            languages=["eng", "spa"],
            audiences=[Classifier.AUDIENCE_CHILDREN],
        )

        keys = m(self._db, worklist, None, None)
        assert "mock type" == keys.feed_type
        assert worklist.get_library(self._db) == keys.library
        assert None == keys.work
        assert None == keys.lane_id
        assert "wl-eng,spa-Children" == keys.unique_key
        assert keys.unique_key == worklist.unique_key
        assert "" == keys.facets_key
        assert "" == keys.pagination_key

        # When a WorkList is associated with a specific .work,
        # that information is included as keys.work.
        work = object()
        worklist.work = work
        keys = m(self._db, worklist, None, None)
        assert work == keys.work
Пример #12
0
    def test_response_format(self):
        # Verify that fetch() can be told to return an appropriate
        # OPDSFeedResponse object. This is the default behavior, since
        # it preserves some useful information that would otherwise be
        # lost.
        facets = Facets.default(self._default_library)
        pagination = Pagination.default()
        wl = WorkList()
        wl.initialize(self._default_library)

        def refresh():
            return "Here's a feed."

        private = object()
        r = CachedFeed.fetch(
            self._db, wl, facets, pagination, refresh, max_age=102, private=private
        )
        assert isinstance(r, OPDSFeedResponse)
        assert 200 == r.status_code
        assert OPDSFeed.ACQUISITION_FEED_TYPE == r.content_type
        assert 102 == r.max_age
        assert "Here's a feed." == str(r)

        # The extra argument `private`, not used by CachedFeed.fetch, was
        # passed on to the OPDSFeedResponse constructor.
        assert private == r.private

        # The CachedFeed was created; just not returned.
        cf = self._db.query(CachedFeed).one()
        assert "Here's a feed." == cf.content

        # Try it again as a cache hit.
        r = CachedFeed.fetch(
            self._db, wl, facets, pagination, refresh, max_age=102, private=private
        )
        assert isinstance(r, OPDSFeedResponse)
        assert 200 == r.status_code
        assert OPDSFeed.ACQUISITION_FEED_TYPE == r.content_type
        assert 102 == r.max_age
        assert "Here's a feed." == str(r)

        # If we tell CachedFeed to cache its feed 'forever', that only
        # applies to the _database_ cache. The client is told to cache
        # the feed for the default period.
        r = CachedFeed.fetch(
            self._db,
            wl,
            facets,
            pagination,
            refresh,
            max_age=CachedFeed.CACHE_FOREVER,
            private=private,
        )
        assert isinstance(r, OPDSFeedResponse)
        assert OPDSFeed.DEFAULT_MAX_AGE == r.max_age

        # If the Library associated with the WorkList used in the feed
        # has root lanes, `private` is always set to True, even if we
        # asked for the opposite.

        from core.model import Library

        Library._has_root_lane_cache[self._default_library.id] = True
        r = CachedFeed.fetch(self._db, wl, facets, pagination, refresh, private=False)
        assert isinstance(r, OPDSFeedResponse)
        assert True == r.private
Пример #13
0
    def test_no_race_conditions(self):
        # Why do we look up a CachedFeed again after feed generation?
        # Well, let's see what happens if someone else messes around
        # with the CachedFeed object _while the refresher is running_.
        #
        # This is a race condition that happens in real life. Rather
        # than setting up a multi-threaded test, we can have the
        # refresher itself simulate a background modification by
        # messing around with the CachedFeed object we know will
        # eventually be returned.
        #
        # The most up-to-date feed always wins, so background
        # modifications will take effect only if they made the
        # CachedFeed look _newer_ than the foreground process does.
        facets = Facets.default(self._default_library)
        pagination = Pagination.default()
        wl = WorkList()
        wl.initialize(self._default_library)

        m = CachedFeed.fetch

        # In this case, two simulated threads try to create the same
        # CachedFeed at the same time. We end up with a single
        # CachedFeed containing the result of the last code that ran.
        def simultaneous_refresher():
            # This refresher method simulates another thread creating
            # a CachedFeed for this feed while this thread's
            # refresher is running.
            def other_thread_refresher():
                return "Another thread made a feed."

            m(self._db, wl, facets, pagination, other_thread_refresher, 0, raw=True)

            return "Then this thread made a feed."

        # This will call simultaneous_refresher(), which will call
        # CachedFeed.fetch() _again_, which will call
        # other_thread_refresher().
        result = m(
            self._db, wl, facets, pagination, simultaneous_refresher, 0, raw=True
        )

        # We ended up with a single CachedFeed containing the
        # latest information.
        assert [result] == self._db.query(CachedFeed).all()
        assert "Then this thread made a feed." == result.content

        # If two threads contend for an existing CachedFeed, the one that
        # sets CachedFeed.timestamp to the later value wins.
        #
        # Here, the other thread wins by setting .timestamp on the
        # existing CachedFeed to a date in the future.
        now = utc_now()
        tomorrow = now + datetime.timedelta(days=1)
        yesterday = now - datetime.timedelta(days=1)

        def tomorrow_vs_now():
            result.content = "Someone in the background set tomorrow's content."
            result.timestamp = tomorrow
            return "Today's content can't compete."

        tomorrow_result = m(
            self._db, wl, facets, pagination, tomorrow_vs_now, 0, raw=True
        )
        assert tomorrow_result == result
        assert (
            "Someone in the background set tomorrow's content."
            == tomorrow_result.content
        )
        assert tomorrow_result.timestamp == tomorrow

        # Here, the other thread sets .timestamp to a date in the past, and
        # it loses out to the (apparently) newer feed.
        def yesterday_vs_now():
            result.content = "Someone in the background set yesterday's content."
            result.timestamp = yesterday
            return "Today's content is fresher."

        now_result = m(self._db, wl, facets, pagination, yesterday_vs_now, 0, raw=True)

        # We got the same CachedFeed we've been getting this whole
        # time, but the outdated data set by the 'background thread'
        # has been fixed.
        assert result == now_result
        assert "Today's content is fresher." == result.content
        assert result.timestamp > yesterday

        # This shouldn't happen, but if the CachedFeed's timestamp or
        # content are *cleared out* in the background, between the
        # time the CacheFeed is fetched and the time the refresher
        # finishes, then we don't know what's going on and we don't
        # take chances. We create a whole new CachedFeed object for
        # the updated version of the feed.

        # First, try the situation where .timestamp is cleared out in
        # the background.
        def timestamp_cleared_in_background():
            result.content = "Someone else sets content and clears timestamp."
            result.timestamp = None

            return "Non-weird content."

        result2 = m(
            self._db,
            wl,
            facets,
            pagination,
            timestamp_cleared_in_background,
            0,
            raw=True,
        )
        now = utc_now()

        # result2 is a brand new CachedFeed.
        assert result2 != result
        assert "Non-weird content." == result2.content
        assert (now - result2.timestamp).total_seconds() < 2

        # We let the background process do whatever it wants to do
        # with the old one.
        assert "Someone else sets content and clears timestamp." == result.content
        assert None == result.timestamp

        # Next, test the situation where .content is cleared out.
        def content_cleared_in_background():
            result2.content = None
            result2.timestamp = tomorrow

            return "Non-weird content."

        result3 = m(
            self._db, wl, facets, pagination, content_cleared_in_background, 0, raw=True
        )
        now = utc_now()

        # Again, a brand new CachedFeed.
        assert result3 != result2
        assert result3 != result
        assert "Non-weird content." == result3.content
        assert (now - result3.timestamp).total_seconds() < 2

        # Again, we let the background process have the old one for
        # whatever weird thing it wants to do.
        assert None == result2.content
        assert tomorrow == result2.timestamp
Пример #14
0
    def test_records(self):
        integration = self._integration()
        now = utc_now()
        exporter = MARCExporter.from_config(self._default_library)
        annotator = Annotator()
        lane = self._lane("Test Lane", genres=["Mystery"])
        w1 = self._work(genre="Mystery", with_open_access_download=True)
        w2 = self._work(genre="Mystery", with_open_access_download=True)

        search_engine = MockExternalSearchIndex()
        search_engine.bulk_update([w1, w2])

        # If there's a storage protocol but not corresponding storage integration,
        # it raises an exception.
        pytest.raises(Exception, exporter.records, lane, annotator)

        # If there is a storage integration, the output file is mirrored.
        mirror_integration = self._external_integration(
            ExternalIntegration.S3,
            ExternalIntegration.STORAGE_GOAL,
            username="******",
            password="******",
        )

        mirror = MockS3Uploader()

        exporter.records(
            lane,
            annotator,
            mirror_integration,
            mirror=mirror,
            query_batch_size=1,
            upload_batch_size=1,
            search_engine=search_engine,
        )

        # The file was mirrored and a CachedMARCFile was created to track the mirrored file.
        assert 1 == len(mirror.uploaded)
        [cache] = self._db.query(CachedMARCFile).all()
        assert self._default_library == cache.library
        assert lane == cache.lane
        assert mirror.uploaded[0] == cache.representation
        assert None == cache.representation.content
        assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s/%s.mrc" % (
            self._default_library.short_name,
            quote(str(cache.representation.fetched_at)),
            quote(lane.display_name),
        ) == mirror.uploaded[0].mirror_url)
        assert None == cache.start_time
        assert cache.end_time > now

        # The content was uploaded in two parts.
        assert 2 == len(mirror.content[0])
        complete_file = b"".join(mirror.content[0])
        records = list(MARCReader(complete_file))
        assert 2 == len(records)

        title_fields = [record.get_fields("245") for record in records]
        titles = [fields[0].get_subfields("a")[0] for fields in title_fields]
        assert set([w1.title, w2.title]) == set(titles)

        assert w1.title in w1.marc_record
        assert w2.title in w2.marc_record

        self._db.delete(cache)

        # It also works with a WorkList instead of a Lane, in which case
        # there will be no lane in the CachedMARCFile.
        worklist = WorkList()
        worklist.initialize(self._default_library, display_name="All Books")

        mirror = MockS3Uploader()
        exporter.records(
            worklist,
            annotator,
            mirror_integration,
            mirror=mirror,
            query_batch_size=1,
            upload_batch_size=1,
            search_engine=search_engine,
        )

        assert 1 == len(mirror.uploaded)
        [cache] = self._db.query(CachedMARCFile).all()
        assert self._default_library == cache.library
        assert None == cache.lane
        assert mirror.uploaded[0] == cache.representation
        assert None == cache.representation.content
        assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s/%s.mrc" % (
            self._default_library.short_name,
            quote(str(cache.representation.fetched_at)),
            quote(worklist.display_name),
        ) == mirror.uploaded[0].mirror_url)
        assert None == cache.start_time
        assert cache.end_time > now

        assert 2 == len(mirror.content[0])
        complete_file = b"".join(mirror.content[0])
        records = list(MARCReader(complete_file))
        assert 2 == len(records)

        self._db.delete(cache)

        # If a start time is set, it's used in the mirror url.
        #
        # (Our mock search engine returns everthing in its 'index',
        # so this doesn't test that the start time is actually used to
        # find works -- that's in the search index tests and the
        # tests of MARCExporterFacets.)
        start_time = now - datetime.timedelta(days=3)

        mirror = MockS3Uploader()
        exporter.records(
            lane,
            annotator,
            mirror_integration,
            start_time=start_time,
            mirror=mirror,
            query_batch_size=2,
            upload_batch_size=2,
            search_engine=search_engine,
        )
        [cache] = self._db.query(CachedMARCFile).all()

        assert self._default_library == cache.library
        assert lane == cache.lane
        assert mirror.uploaded[0] == cache.representation
        assert None == cache.representation.content
        assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s-%s/%s.mrc" % (
            self._default_library.short_name,
            quote(str(start_time)),
            quote(str(cache.representation.fetched_at)),
            quote(lane.display_name),
        ) == mirror.uploaded[0].mirror_url)
        assert start_time == cache.start_time
        assert cache.end_time > now
        self._db.delete(cache)

        # If the search engine returns no contents for the lane,
        # nothing will be mirrored, but a CachedMARCFile is still
        # created to track that we checked for updates.
        empty_search_engine = MockExternalSearchIndex()

        mirror = MockS3Uploader()
        exporter.records(
            lane,
            annotator,
            mirror_integration,
            mirror=mirror,
            search_engine=empty_search_engine,
        )

        assert [] == mirror.content[0]
        [cache] = self._db.query(CachedMARCFile).all()
        assert cache.representation == mirror.uploaded[0]
        assert self._default_library == cache.library
        assert lane == cache.lane
        assert None == cache.representation.content
        assert None == cache.start_time
        assert cache.end_time > now

        self._db.delete(cache)