示例#1
0
    def test_coverage_record(self):
        edition, pool = self._edition(with_license_pool=True)
        data_source = edition.data_source

        # No preexisting coverage record
        coverage = CoverageRecord.lookup(edition, data_source)
        eq_(coverage, None)

        last_update = datetime.datetime(2015, 1, 1)

        m = Metadata(data_source=data_source,
                     title=u"New title",
                     data_source_last_updated=last_update)
        m.apply(edition)

        coverage = CoverageRecord.lookup(edition, data_source)
        eq_(last_update, coverage.timestamp)
        eq_(u"New title", edition.title)

        older_last_update = datetime.datetime(2014, 1, 1)
        m = Metadata(data_source=data_source,
                     title=u"Another new title",
                     data_source_last_updated=older_last_update)
        m.apply(edition)
        eq_(u"New title", edition.title)

        coverage = CoverageRecord.lookup(edition, data_source)
        eq_(last_update, coverage.timestamp)

        m.apply(edition, force=True)
        eq_(u"Another new title", edition.title)
        coverage = CoverageRecord.lookup(edition, data_source)
        eq_(older_last_update, coverage.timestamp)
示例#2
0
    def test_import_one_feed(self):
        # Check coverage records are created.

        monitor = OPDSImportMonitor(self._db, "http://url",
                                    DataSource.OA_CONTENT_SERVER,
                                    DoomedOPDSImporter)
        data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER)

        feed = self.content_server_mini_feed

        monitor.import_one_feed(feed, "http://root-url/")

        editions = self._db.query(Edition).all()

        # One edition has been imported
        eq_(1, len(editions))
        [edition] = editions

        # That edition has a CoverageRecord.
        record = CoverageRecord.lookup(
            editions[0].primary_identifier,
            data_source,
            operation=CoverageRecord.IMPORT_OPERATION)
        eq_(CoverageRecord.SUCCESS, record.status)
        eq_(None, record.exception)

        # The edition's primary identifier has a cover link whose
        # relative URL has been resolved relative to the URL we passed
        # into import_one_feed.
        [cover] = [
            x.resource.url for x in editions[0].primary_identifier.links
            if x.rel == Hyperlink.IMAGE
        ]
        eq_("http://root-url/full-cover-image.png", cover)

        # The 202 status message in the feed caused a transient failure.
        # The exception caused a persistent failure.

        coverage_records = self._db.query(CoverageRecord).filter(
            CoverageRecord.operation == CoverageRecord.IMPORT_OPERATION,
            CoverageRecord.status != CoverageRecord.SUCCESS)
        eq_(
            sorted([
                CoverageRecord.TRANSIENT_FAILURE,
                CoverageRecord.PERSISTENT_FAILURE
            ]), sorted([x.status for x in coverage_records]))

        identifier, ignore = Identifier.parse_urn(
            self._db,
            "urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441")
        failure = CoverageRecord.lookup(
            identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION)
        assert "Utter failure!" in failure.exception
示例#3
0
    def check_for_new_data(self, feed):
        """Check if the feed contains any entries that haven't been imported
        yet. If force_import is set, every entry in the feed is
        treated as new.
        """

        # If force_reimport is set, we don't even need to check. Always
        # treat the feed as though it contained new data.
        if self.force_reimport:
            return True

        last_update_dates = self.importer.extract_last_update_dates(feed)

        new_data = False
        for identifier, remote_updated in last_update_dates:

            identifier, ignore = Identifier.parse_urn(self._db, identifier)
            data_source = self.importer.data_source
            record = None

            if identifier:
                record = CoverageRecord.lookup(
                    identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION
                )

            # If there was a transient failure last time we tried to
            # import this book, try again regardless of whether the
            # feed has changed.
            if record and record.status == CoverageRecord.TRANSIENT_FAILURE:
                new_data = True
                self.log.info(
                    "Counting %s as new because previous attempt resulted in transient failure: %s", 
                    record.identifier, record.exception
                )
                break

            # If our last attempt was a success or a persistent
            # failure, we only want to import again if something
            # changed since then.

            if record and record.timestamp:
                # We've imported this entry before, so don't import it
                # again unless it's changed.

                if not remote_updated:
                    # The remote isn't telling us whether the entry
                    # has been updated. Import it again to be safe.
                    new_data = True
                    self.log.info(
                        "Counting %s as new because remote has no information about when it was updated.", 
                        record.identifier
                    )
                    break

                if remote_updated >= record.timestamp:
                    # This book has been updated.
                    self.log.info(
                        "Counting %s as new because its coverage date is %s and remote has %s.", 
                        record.identifier, record.timestamp, remote_updated
                    )

                    new_data = True
                    break

            else:
                # There's no record of an attempt to import this book.
                self.log.info(
                    "Counting %s as new because it has no CoverageRecord.", 
                    identifier
                )
                new_data = True
                break
        return new_data