def test_run_on_specific_identifiers_respects_cutoff_time(self): last_run = datetime.datetime(2016, 1, 1) # Once upon a time we successfully added coverage for # self.identifier. record, ignore = CoverageRecord.add_for(self.identifier, self.output_source) record.timestamp = last_run # But now something has gone wrong, and if we ever run the # coverage provider again we will get a persistent failure. provider = NeverSuccessfulCoverageProvider("Persistent failure", self.input_identifier_types, self.output_source, cutoff_time=last_run) # You might think this would result in a persistent failure... (success, transient_failure, persistent_failure), records = (provider.run_on_specific_identifiers( [self.identifier])) # ...but we get an automatic success. We didn't even try to # run the coverage provider on self.identifier because the # coverage record was up-to-date. eq_(1, success) eq_(0, persistent_failure) eq_([], records) # But if we move the cutoff time forward, the provider will run # on self.identifier and fail. provider.cutoff_time = datetime.datetime(2016, 2, 1) (success, transient_failure, persistent_failure), records = (provider.run_on_specific_identifiers( [self.identifier])) eq_(0, success) eq_(1, persistent_failure) # The formerly successful CoverageRecord will be updated to # reflect the failure. eq_(records[0], record) eq_("What did you expect?", record.exception)
def test_should_update(self): cutoff = datetime.datetime(2016, 1, 1) provider = AlwaysSuccessfulCoverageProvider( "Always successful", self.input_identifier_types, self.output_source, cutoff_time=cutoff) # If coverage is missing, we should update. eq_(True, provider.should_update(None)) # If coverage is outdated, we should update. record, ignore = CoverageRecord.add_for(self.identifier, self.output_source) record.timestamp = datetime.datetime(2015, 1, 1) eq_(True, provider.should_update(record)) # If coverage is up-to-date, we should not update. record.timestamp = cutoff eq_(False, provider.should_update(record))
def import_one_feed(self, feed, feed_url=None): imported_editions, pools, works, failures = self.importer.import_from_feed( feed, even_if_no_author=True, immediately_presentation_ready=self.immediately_presentation_ready, feed_url=feed_url) data_source = DataSource.lookup(self._db, self.importer.data_source_name) # Create CoverageRecords for the successful imports. for edition in imported_editions: record = CoverageRecord.add_for(edition, data_source, CoverageRecord.IMPORT_OPERATION, status=CoverageRecord.SUCCESS) # Create CoverageRecords for the failures. for urn, failure in failures.items(): failure.to_coverage_record( operation=CoverageRecord.IMPORT_OPERATION)
def test_follow_one_link(self): monitor = OPDSImportMonitor(self._db, "http://url", DataSource.OA_CONTENT_SERVER, OPDSImporter) feed = self.content_server_mini_feed # If there's new data, follow_one_link extracts the next links. http = DummyHTTPClient() http.queue_response(200, content=feed) next_links, content = monitor.follow_one_link("http://url", do_get=http.do_get) eq_(1, len(next_links)) eq_("http://localhost:5000/?after=327&size=100", next_links[0]) eq_(feed, content) # Now import the editions and add coverage records. monitor.importer.import_from_feed(feed) eq_(2, self._db.query(Edition).count()) editions = self._db.query(Edition).all() data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) for edition in editions: record, ignore = CoverageRecord.add_for( edition, data_source, CoverageRecord.IMPORT_OPERATION) record.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1) # If there's no new data, follow_one_link returns no next links and no content. http.queue_response(200, content=feed) next_links, content = monitor.follow_one_link("http://url", do_get=http.do_get) eq_(0, len(next_links)) eq_(None, content)
def test_check_for_new_data(self): feed = self.content_server_mini_feed class MockOPDSImportMonitor(OPDSImportMonitor): def _get(self, url, headers): return 200, {}, feed monitor = OPDSImportMonitor(self._db, "http://url", DataSource.OA_CONTENT_SERVER, OPDSImporter) # Nothing has been imported yet, so all data is new. eq_(True, monitor.check_for_new_data(feed)) # Now import the editions. monitor = MockOPDSImportMonitor(self._db, "http://url", DataSource.OA_CONTENT_SERVER, OPDSImporter) monitor.run_once("http://url", None) # Editions have been imported. eq_(2, self._db.query(Edition).count()) # Note that unlike many other Monitors, OPDSImportMonitor # doesn't store a Timestamp. assert not hasattr(monitor, 'timestamp') editions = self._db.query(Edition).all() data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) # If there are CoverageRecords that record work are after the updated # dates, there's nothing new. record, ignore = CoverageRecord.add_for( editions[0], data_source, CoverageRecord.IMPORT_OPERATION) record.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1) record2, ignore = CoverageRecord.add_for( editions[1], data_source, CoverageRecord.IMPORT_OPERATION) record2.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1) eq_(False, monitor.check_for_new_data(feed)) # If the monitor is set up to force reimport, it doesn't # matter that there's nothing new--we act as though there is. monitor.force_reimport = True eq_(True, monitor.check_for_new_data(feed)) monitor.force_reimport = False # If an entry was updated after the date given in that entry's # CoverageRecord, there's new data. record2.timestamp = datetime.datetime(1970, 1, 1, 1, 1, 1) eq_(True, monitor.check_for_new_data(feed)) # If a CoverageRecord is a transient failure, we try again # regardless of whether it's been updated. for r in [record, record2]: r.timestamp = datetime.datetime(2016, 1, 1, 1, 1, 1) r.exception = "Failure!" r.status = CoverageRecord.TRANSIENT_FAILURE eq_(True, monitor.check_for_new_data(feed)) # If a CoverageRecord is a persistent failure, we don't try again... for r in [record, record2]: r.status = CoverageRecord.PERSISTENT_FAILURE eq_(False, monitor.check_for_new_data(feed)) # ...unless the feed updates. record.timestamp = datetime.datetime(1970, 1, 1, 1, 1, 1) eq_(True, monitor.check_for_new_data(feed))