def test_coverage_record(self): edition, pool = self._edition(with_license_pool=True) data_source = edition.data_source # No preexisting coverage record coverage = CoverageRecord.lookup(edition, data_source) eq_(coverage, None) last_update = datetime.datetime(2015, 1, 1) m = Metadata(data_source=data_source, title=u"New title", data_source_last_updated=last_update) m.apply(edition) coverage = CoverageRecord.lookup(edition, data_source) eq_(last_update, coverage.timestamp) eq_(u"New title", edition.title) older_last_update = datetime.datetime(2014, 1, 1) m = Metadata(data_source=data_source, title=u"Another new title", data_source_last_updated=older_last_update) m.apply(edition) eq_(u"New title", edition.title) coverage = CoverageRecord.lookup(edition, data_source) eq_(last_update, coverage.timestamp) m.apply(edition, force=True) eq_(u"Another new title", edition.title) coverage = CoverageRecord.lookup(edition, data_source) eq_(older_last_update, coverage.timestamp)
def test_import_one_feed(self): # Check coverage records are created. monitor = OPDSImportMonitor(self._db, "http://url", DataSource.OA_CONTENT_SERVER, DoomedOPDSImporter) data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) feed = self.content_server_mini_feed monitor.import_one_feed(feed, "http://root-url/") editions = self._db.query(Edition).all() # One edition has been imported eq_(1, len(editions)) [edition] = editions # That edition has a CoverageRecord. record = CoverageRecord.lookup( editions[0].primary_identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION) eq_(CoverageRecord.SUCCESS, record.status) eq_(None, record.exception) # The edition's primary identifier has a cover link whose # relative URL has been resolved relative to the URL we passed # into import_one_feed. [cover] = [ x.resource.url for x in editions[0].primary_identifier.links if x.rel == Hyperlink.IMAGE ] eq_("http://root-url/full-cover-image.png", cover) # The 202 status message in the feed caused a transient failure. # The exception caused a persistent failure. coverage_records = self._db.query(CoverageRecord).filter( CoverageRecord.operation == CoverageRecord.IMPORT_OPERATION, CoverageRecord.status != CoverageRecord.SUCCESS) eq_( sorted([ CoverageRecord.TRANSIENT_FAILURE, CoverageRecord.PERSISTENT_FAILURE ]), sorted([x.status for x in coverage_records])) identifier, ignore = Identifier.parse_urn( self._db, "urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441") failure = CoverageRecord.lookup( identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION) assert "Utter failure!" in failure.exception
def check_for_new_data(self, feed): """Check if the feed contains any entries that haven't been imported yet. If force_import is set, every entry in the feed is treated as new. """ # If force_reimport is set, we don't even need to check. Always # treat the feed as though it contained new data. if self.force_reimport: return True last_update_dates = self.importer.extract_last_update_dates(feed) new_data = False for identifier, remote_updated in last_update_dates: identifier, ignore = Identifier.parse_urn(self._db, identifier) data_source = self.importer.data_source record = None if identifier: record = CoverageRecord.lookup( identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION ) # If there was a transient failure last time we tried to # import this book, try again regardless of whether the # feed has changed. if record and record.status == CoverageRecord.TRANSIENT_FAILURE: new_data = True self.log.info( "Counting %s as new because previous attempt resulted in transient failure: %s", record.identifier, record.exception ) break # If our last attempt was a success or a persistent # failure, we only want to import again if something # changed since then. if record and record.timestamp: # We've imported this entry before, so don't import it # again unless it's changed. if not remote_updated: # The remote isn't telling us whether the entry # has been updated. Import it again to be safe. new_data = True self.log.info( "Counting %s as new because remote has no information about when it was updated.", record.identifier ) break if remote_updated >= record.timestamp: # This book has been updated. self.log.info( "Counting %s as new because its coverage date is %s and remote has %s.", record.identifier, record.timestamp, remote_updated ) new_data = True break else: # There's no record of an attempt to import this book. self.log.info( "Counting %s as new because it has no CoverageRecord.", identifier ) new_data = True break return new_data