示例#1
0
    def test_run_on_specific_identifiers_respects_cutoff_time(self):

        last_run = datetime.datetime(2016, 1, 1)

        # Once upon a time we successfully added coverage for
        # self.identifier.
        record, ignore = CoverageRecord.add_for(self.identifier,
                                                self.output_source)
        record.timestamp = last_run

        # But now something has gone wrong, and if we ever run the
        # coverage provider again we will get a persistent failure.
        provider = NeverSuccessfulCoverageProvider("Persistent failure",
                                                   self.input_identifier_types,
                                                   self.output_source,
                                                   cutoff_time=last_run)

        # You might think this would result in a persistent failure...
        (success, transient_failure,
         persistent_failure), records = (provider.run_on_specific_identifiers(
             [self.identifier]))

        # ...but we get an automatic success. We didn't even try to
        # run the coverage provider on self.identifier because the
        # coverage record was up-to-date.
        eq_(1, success)
        eq_(0, persistent_failure)
        eq_([], records)

        # But if we move the cutoff time forward, the provider will run
        # on self.identifier and fail.
        provider.cutoff_time = datetime.datetime(2016, 2, 1)
        (success, transient_failure,
         persistent_failure), records = (provider.run_on_specific_identifiers(
             [self.identifier]))
        eq_(0, success)
        eq_(1, persistent_failure)

        # The formerly successful CoverageRecord will be updated to
        # reflect the failure.
        eq_(records[0], record)
        eq_("What did you expect?", record.exception)
    def test_should_update(self):
        cutoff = datetime.datetime(2016, 1, 1)
        provider = AlwaysSuccessfulCoverageProvider(
            "Always successful",
            self.input_identifier_types,
            self.output_source,
            cutoff_time=cutoff)

        # If coverage is missing, we should update.
        eq_(True, provider.should_update(None))

        # If coverage is outdated, we should update.
        record, ignore = CoverageRecord.add_for(self.identifier,
                                                self.output_source)
        record.timestamp = datetime.datetime(2015, 1, 1)
        eq_(True, provider.should_update(record))

        # If coverage is up-to-date, we should not update.
        record.timestamp = cutoff
        eq_(False, provider.should_update(record))
示例#3
0
    def import_one_feed(self, feed, feed_url=None):
        imported_editions, pools, works, failures = self.importer.import_from_feed(
            feed,
            even_if_no_author=True,
            immediately_presentation_ready=self.immediately_presentation_ready,
            feed_url=feed_url)

        data_source = DataSource.lookup(self._db,
                                        self.importer.data_source_name)

        # Create CoverageRecords for the successful imports.
        for edition in imported_editions:
            record = CoverageRecord.add_for(edition,
                                            data_source,
                                            CoverageRecord.IMPORT_OPERATION,
                                            status=CoverageRecord.SUCCESS)

        # Create CoverageRecords for the failures.
        for urn, failure in failures.items():
            failure.to_coverage_record(
                operation=CoverageRecord.IMPORT_OPERATION)
示例#4
0
    def test_follow_one_link(self):
        monitor = OPDSImportMonitor(self._db, "http://url",
                                    DataSource.OA_CONTENT_SERVER, OPDSImporter)
        feed = self.content_server_mini_feed

        # If there's new data, follow_one_link extracts the next links.

        http = DummyHTTPClient()
        http.queue_response(200, content=feed)

        next_links, content = monitor.follow_one_link("http://url",
                                                      do_get=http.do_get)

        eq_(1, len(next_links))
        eq_("http://localhost:5000/?after=327&size=100", next_links[0])

        eq_(feed, content)

        # Now import the editions and add coverage records.
        monitor.importer.import_from_feed(feed)
        eq_(2, self._db.query(Edition).count())

        editions = self._db.query(Edition).all()
        data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)

        for edition in editions:
            record, ignore = CoverageRecord.add_for(
                edition, data_source, CoverageRecord.IMPORT_OPERATION)
            record.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1)

        # If there's no new data, follow_one_link returns no next links and no content.
        http.queue_response(200, content=feed)

        next_links, content = monitor.follow_one_link("http://url",
                                                      do_get=http.do_get)

        eq_(0, len(next_links))
        eq_(None, content)
示例#5
0
    def test_check_for_new_data(self):
        feed = self.content_server_mini_feed

        class MockOPDSImportMonitor(OPDSImportMonitor):
            def _get(self, url, headers):
                return 200, {}, feed

        monitor = OPDSImportMonitor(self._db, "http://url",
                                    DataSource.OA_CONTENT_SERVER, OPDSImporter)

        # Nothing has been imported yet, so all data is new.
        eq_(True, monitor.check_for_new_data(feed))

        # Now import the editions.
        monitor = MockOPDSImportMonitor(self._db, "http://url",
                                        DataSource.OA_CONTENT_SERVER,
                                        OPDSImporter)
        monitor.run_once("http://url", None)

        # Editions have been imported.
        eq_(2, self._db.query(Edition).count())

        # Note that unlike many other Monitors, OPDSImportMonitor
        # doesn't store a Timestamp.
        assert not hasattr(monitor, 'timestamp')

        editions = self._db.query(Edition).all()
        data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)

        # If there are CoverageRecords that record work are after the updated
        # dates, there's nothing new.
        record, ignore = CoverageRecord.add_for(
            editions[0], data_source, CoverageRecord.IMPORT_OPERATION)
        record.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1)

        record2, ignore = CoverageRecord.add_for(
            editions[1], data_source, CoverageRecord.IMPORT_OPERATION)
        record2.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1)

        eq_(False, monitor.check_for_new_data(feed))

        # If the monitor is set up to force reimport, it doesn't
        # matter that there's nothing new--we act as though there is.
        monitor.force_reimport = True
        eq_(True, monitor.check_for_new_data(feed))
        monitor.force_reimport = False

        # If an entry was updated after the date given in that entry's
        # CoverageRecord, there's new data.
        record2.timestamp = datetime.datetime(1970, 1, 1, 1, 1, 1)
        eq_(True, monitor.check_for_new_data(feed))

        # If a CoverageRecord is a transient failure, we try again
        # regardless of whether it's been updated.
        for r in [record, record2]:
            r.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1)
            r.exception = "Failure!"
            r.status = CoverageRecord.TRANSIENT_FAILURE
        eq_(True, monitor.check_for_new_data(feed))

        # If a CoverageRecord is a persistent failure, we don't try again...
        for r in [record, record2]:
            r.status = CoverageRecord.PERSISTENT_FAILURE
        eq_(False, monitor.check_for_new_data(feed))

        # ...unless the feed updates.
        record.timestamp = datetime.datetime(1970, 1, 1, 1, 1, 1)
        eq_(True, monitor.check_for_new_data(feed))