Exemplo n.º 1
0
    def __init__(self, _db, lookup=None, input_identifier_types=None, 
                 operation=None, **kwargs):
        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
            ]
        output_source = DataSource.lookup(
            _db, DataSource.METADATA_WRANGLER
        )
        super(MetadataWranglerCoverageProvider, self).__init__(
            lookup = lookup or SimplifiedOPDSLookup.from_config(),
            service_name=self.SERVICE_NAME,
            input_identifier_types=input_identifier_types,
            output_source=output_source,
            operation=operation or self.OPERATION,
            **kwargs
        )

        if not self.lookup.authenticated:
            self.log.warn(
                "Authentication for the Library Simplified Metadata Wrangler "
                "is not set up. You can still use the metadata wrangler, but "
                "it will not know which collection you're asking about."
            )
Exemplo n.º 2
0
 def __init__(self, collection, lookup_client, *args, **kwargs):
     if not lookup_client:
         content_server_url = (Configuration.integration_url(
             Configuration.CONTENT_SERVER_INTEGRATION))
         lookup_client = SimplifiedOPDSLookup(content_server_url)
     super(ContentServerBibliographicCoverageProvider,
           self).__init__(collection, lookup_client, *args, **kwargs)
Exemplo n.º 3
0
    def __init__(self, _db, lookup=None, input_identifier_types=None, 
                 operation=None, input_identifiers=None, **kwargs):

        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
                Identifier.ONECLICK_ID, 
            ]
        output_source = DataSource.lookup(
            _db, DataSource.METADATA_WRANGLER
        )
        super(MetadataWranglerCoverageProvider, self).__init__(
            lookup = lookup or SimplifiedOPDSLookup.from_config(),
            service_name=self.SERVICE_NAME,
            input_identifier_types=input_identifier_types,
            output_source=output_source,
            operation=operation or self.OPERATION,
            **kwargs
        )

        self.input_identifiers = input_identifiers

        if not self.lookup.authenticated:
            self.log.warn(
                "Authentication for the Library Simplified Metadata Wrangler "
                "is not set up. You can still use the metadata wrangler, but "
                "it will not know which collection you're asking about."
            )
Exemplo n.º 4
0
    def __init__(self, _db, input_identifier_types=None, metadata_lookup=None,
                 cutoff_time=None, operation=None):
        self._db = _db
        if not input_identifier_types:
            input_identifier_types = [
                Identifier.OVERDRIVE_ID, 
                Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID, 
                Identifier.AXIS_360_ID,
            ]
        self.output_source = DataSource.lookup(
            self._db, DataSource.METADATA_WRANGLER
        )

        if not metadata_lookup:
            metadata_lookup = SimplifiedOPDSLookup.from_config()
        self.lookup = metadata_lookup

        if not operation:
            operation = CoverageRecord.SYNC_OPERATION
        self.operation = operation

        super(MetadataWranglerCoverageProvider, self).__init__(
            self.service_name,
            input_identifier_types,
            self.output_source,
            workset_size=20,
            cutoff_time=cutoff_time,
            operation=self.operation,
        )
Exemplo n.º 5
0
 def __init__(self, metadata_web_app_url=None):
     self.metadata_url = (
         metadata_web_app_url or Configuration.integration_url(
             Configuration.METADATA_WRANGLER_INTEGRATION
         )
     )
     self.lookup = SimplifiedOPDSLookup(self.metadata_url)
Exemplo n.º 6
0
 def __init__(self, _db, interval_seconds=None):
     super(SetDeliveryMechanismMonitor, self).__init__(
         _db,
         "20150929 migration - Set delivery mechanism for 3M books",
         interval_seconds,
         batch_size=10)
     self.api = ThreeMAPI(_db)
     self.metadata_client = SimplifiedOPDSLookup(
         "http://metadata.alpha.librarysimplified.org/")
Exemplo n.º 7
0
 def __init__(self, _db, service_name=None, lookup=None, **kwargs):
     service_name = service_name or self.DEFAULT_SERVICE_NAME
     if not lookup:
         content_server_url = (
             Configuration.integration_url(
                 Configuration.CONTENT_SERVER_INTEGRATION
             )
         )
         lookup = SimplifiedOPDSLookup(content_server_url)
     output_source = DataSource.lookup(
         _db, DataSource.OA_CONTENT_SERVER
     )
     kwargs['input_identifier_types'] = None
     super(ContentServerBibliographicCoverageProvider, self).__init__(
         service_name,
         output_source=output_source, lookup=lookup,
         expect_license_pool=True, presentation_ready_on_success=True,
         **kwargs
     )
Exemplo n.º 8
0
 def __init__(self,
              _db,
              name="Axis 360 Circulation Monitor",
              interval_seconds=60,
              batch_size=50,
              api=None):
     super(Axis360CirculationMonitor,
           self).__init__(_db,
                          name,
                          interval_seconds=interval_seconds,
                          default_start_time=self.VERY_LONG_AGO)
     self.batch_size = batch_size
     metadata_wrangler_url = Configuration.integration_url(
         Configuration.METADATA_WRANGLER_INTEGRATION)
     if metadata_wrangler_url:
         self.metadata_wrangler = SimplifiedOPDSLookup(
             metadata_wrangler_url)
     else:
         # This should only happen during a test.
         self.metadata_wrangler = None
     self.api = api or Axis360API.from_environment(self._db)
     self.bibliographic_coverage_provider = (
         Axis360BibliographicCoverageProvider(self._db, axis_360_api=api))
Exemplo n.º 9
0
class CreateWorksForIdentifiersScript(Script):

    """Do the bare minimum to associate each Identifier with an Edition
    with title and author, so that we can calculate a permanent work
    ID.
    """
    to_check = [Identifier.OVERDRIVE_ID, Identifier.THREEM_ID,
                Identifier.GUTENBERG_ID]
    BATCH_SIZE = 100
    name = "Create works for identifiers"

    def __init__(self, metadata_web_app_url=None):
        self.metadata_url = (
            metadata_web_app_url or Configuration.integration_url(
                Configuration.METADATA_WRANGLER_INTEGRATION
            )
        )
        self.lookup = SimplifiedOPDSLookup(self.metadata_url)

    def run(self):

        # We will try to fill in Editions that are missing
        # title/author and as such have no permanent work ID.
        #
        # We will also try to create Editions for Identifiers that
        # have no Edition.

        either_title_or_author_missing = or_(
            Edition.title == None,
            Edition.sort_author == None,
        )
        edition_missing_title_or_author = self._db.query(Identifier).join(
            Identifier.primarily_identifies).filter(
                either_title_or_author_missing)

        no_edition = self._db.query(Identifier).filter(
            Identifier.primarily_identifies==None).filter(
                Identifier.type.in_(self.to_check))

        for q, descr in (
                (edition_missing_title_or_author,
                 "identifiers whose edition is missing title or author"),
                (no_edition, "identifiers with no edition")):
            batch = []
            self.log.debug("Trying to fix %d %s", q.count(), descr)
            for i in q:
                batch.append(i)
                if len(batch) >= self.BATCH_SIZE:
                    self.process_batch(batch)
                    batch = []

    def process_batch(self, batch):
        response = self.lookup.lookup(batch)

        if response.status_code != 200:
            raise Exception(response.text)
            
        content_type = response.headers['content-type']
        if content_type != OPDSFeed.ACQUISITION_FEED_TYPE:
            raise Exception("Wrong media type: %s" % content_type)

        importer = OPDSImporter(
            self._db, response.text,
            overwrite_rels=[Hyperlink.DESCRIPTION, Hyperlink.IMAGE])
        imported, messages_by_id = importer.import_from_feed()
        self.log.info("%d successes, %d failures.", 
                      len(imported), len(messages_by_id))
        self._db.commit()
import sys

bin_dir = os.path.split(__file__)[0]
package_dir = os.path.join(bin_dir, "..")
sys.path.append(os.path.abspath(package_dir))
from core.model import (
    production_session,
    DataSource,
    Work,
    Edition,
)
from overdrive import (OverdriveAPI, OverdriveRepresentationExtractor)
from threem import ThreeMAPI
from core.opds_import import SimplifiedOPDSLookup

lookup = SimplifiedOPDSLookup("http://metadata.alpha.librarysimplified.org/")

_db = production_session()
overdrive = OverdriveAPI(_db)
threem = ThreeMAPI(_db)

q = _db.query(Edition).join(Edition.data_source).filter(
    DataSource.name.in_([DataSource.OVERDRIVE])).filter(Edition.author == '')
print "Fixing %s books." % q.count()
for edition in q:
    if edition.data_source.name == DataSource.OVERDRIVE:
        data = overdrive.metadata_lookup(edition.primary_identifier)
        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(data)
    else:
        metadata = threem.bibliographic_lookup(edition.primary_identifier)
    metadata.update_contributions(_db,
Exemplo n.º 11
0
 def __init__(self, metadata_web_app_url=None):
     self.metadata_url = (metadata_web_app_url
                          or Configuration.integration_url(
                              Configuration.METADATA_WRANGLER_INTEGRATION))
     self.lookup = SimplifiedOPDSLookup(self.metadata_url)
Exemplo n.º 12
0
class CreateWorksForIdentifiersScript(Script):
    """Do the bare minimum to associate each Identifier with an Edition
    with title and author, so that we can calculate a permanent work
    ID.
    """
    to_check = [
        Identifier.OVERDRIVE_ID, Identifier.THREEM_ID, Identifier.GUTENBERG_ID
    ]
    BATCH_SIZE = 100
    name = "Create works for identifiers"

    def __init__(self, metadata_web_app_url=None):
        self.metadata_url = (metadata_web_app_url
                             or Configuration.integration_url(
                                 Configuration.METADATA_WRANGLER_INTEGRATION))
        self.lookup = SimplifiedOPDSLookup(self.metadata_url)

    def run(self):

        # We will try to fill in Editions that are missing
        # title/author and as such have no permanent work ID.
        #
        # We will also try to create Editions for Identifiers that
        # have no Edition.

        either_title_or_author_missing = or_(
            Edition.title == None,
            Edition.sort_author == None,
        )
        edition_missing_title_or_author = self._db.query(Identifier).join(
            Identifier.primarily_identifies).filter(
                either_title_or_author_missing)

        no_edition = self._db.query(Identifier).filter(
            Identifier.primarily_identifies == None).filter(
                Identifier.type.in_(self.to_check))

        for q, descr in ((
                edition_missing_title_or_author,
                "identifiers whose edition is missing title or author"),
                         (no_edition, "identifiers with no edition")):
            batch = []
            self.log.debug("Trying to fix %d %s", q.count(), descr)
            for i in q:
                batch.append(i)
                if len(batch) >= self.BATCH_SIZE:
                    self.process_batch(batch)
                    batch = []

    def process_batch(self, batch):
        response = self.lookup.lookup(batch)

        if response.status_code != 200:
            raise Exception(response.text)

        content_type = response.headers['content-type']
        if content_type != OPDSFeed.ACQUISITION_FEED_TYPE:
            raise Exception("Wrong media type: %s" % content_type)

        importer = OPDSImporter(
            self._db,
            response.text,
            overwrite_rels=[Hyperlink.DESCRIPTION, Hyperlink.IMAGE])
        imported, messages_by_id = importer.import_from_feed()
        self.log.info("%d successes, %d failures.", len(imported),
                      len(messages_by_id))
        self._db.commit()
Exemplo n.º 13
0
 def _lookup_client(self, root):
     return SimplifiedOPDSLookup(root)