def process_batch(self, items): identifiers = [x.primary_identifier for x in items] response = self.content_lookup.lookup(identifiers) importer = OPDSImporter(self._db, DataSource.OA_CONTENT_SERVER) imported, messages_by_id, next_links = importer.import_from_feed( response.content ) results = [] # Handle the successes and seeming successes for edition in imported: if edition.open_access_download_url: self.log.info( "Successfully located open access download ID for %r: %s", edition, edition.open_access_download_url ) results.append(edition.primary_identifier) else: exception = "Open access content server acknowledged book but gave no open-access download URL." failure = CoverageFailure( self, edition, exception=exception, transient=False ) results.append(failure) # Handle the outright failures. for failure in self.handle_import_messages(messages_by_id): results.append(failure) return results
def import_feed_response(self, response, id_mapping): """Confirms OPDS feed response and imports feed. """ self.check_content_type(response) importer = OPDSImporter(self._db, identifier_mapping=id_mapping, data_source_name=self.output_source.name) return importer.import_from_feed(response.text)
def process_feed_response(self, response, id_mapping): """Extracts messages from OPDS feed""" importer = OPDSImporter(self._db, self.collection, data_source_name=self.data_source.name, identifier_mapping=id_mapping) parser = OPDSXMLParser() root = etree.parse(StringIO(response.text)) return importer.extract_messages(parser, root)
def process_feed_response(self, response, id_mapping): """Confirms OPDS feed response and extracts messages. """ self.check_content_type(response) importer = OPDSImporter(self._db, identifier_mapping=id_mapping, data_source_name=self.output_source.name) parser = OPDSXMLParser() root = etree.parse(StringIO(response.text)) return importer.extract_messages(parser, root)
def import_feed_response(self, response): """Confirms OPDS feed response and imports feed""" if response.status_code != 200: self.log.error("BAD RESPONSE CODE: %s", response.status_code) raise HTTPIntegrationException(response.text) content_type = response.headers['content-type'] if content_type != OPDSFeed.ACQUISITION_FEED_TYPE: raise HTTPIntegrationException("Wrong media type: %s" % content_type) importer = OPDSImporter(self._db, identifier_mapping=id_mapping) return importer.import_from_feed(response.text)
def import_feed_response(self, response, id_mapping): """Confirms OPDS feed response and imports feed. """ content_type = response.headers['content-type'] if content_type != OPDSFeed.ACQUISITION_FEED_TYPE: raise BadResponseException.from_response( response.url, "Wrong media type: %s" % content_type, response ) importer = OPDSImporter(self._db, identifier_mapping=id_mapping) return importer.import_from_feed(response.text)
def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None): subtag = parser.text_of_optional_subtag data = OPDSImporter._detail_for_elementtree_entry( parser, entry_tag, feed_url) formats = [] odl_license_tags = parser._xpath(entry_tag, 'odl:license') or [] for odl_license_tag in odl_license_tags: content_type = subtag(odl_license_tag, 'dcterms:format') drm_schemes = [] protection_tags = parser._xpath(odl_license_tag, 'odl:protection') or [] for protection_tag in protection_tags: drm_scheme = subtag(protection_tag, 'dcterms:format') drm_schemes.append(drm_scheme) if not drm_schemes: formats.append( FormatData( content_type=content_type, drm_scheme=None, rights_uri=RightsStatus.IN_COPYRIGHT, )) for drm_scheme in drm_schemes: formats.append( FormatData( content_type=content_type, drm_scheme=drm_scheme, rights_uri=RightsStatus.IN_COPYRIGHT, )) if not data.get('circulation'): data['circulation'] = dict() if not data['circulation'].get('formats'): data['circulation']['formats'] = [] data['circulation']['formats'].extend(formats) return data
def process_batch(self, batch): response = self.lookup.lookup(batch) if response.status_code != 200: raise Exception(response.text) content_type = response.headers['content-type'] if content_type != OPDSFeed.ACQUISITION_FEED_TYPE: raise Exception("Wrong media type: %s" % content_type) importer = OPDSImporter( self._db, response.text, overwrite_rels=[Hyperlink.DESCRIPTION, Hyperlink.IMAGE]) imported, messages_by_id = importer.import_from_feed() self.log.info("%d successes, %d failures.", len(imported), len(messages_by_id)) self._db.commit()
def _importer(self): """Instantiate an appropriate OPDSImporter for the given Collection.""" collection = self.collection metadata_client = AuthorNameCanonicalizer(self._db) return OPDSImporter(self._db, collection, data_source_name=collection.data_source.name, metadata_client=metadata_client)
def _importer(self): # Part of this test is verifying that we just import the OPDS # metadata and don't try to make any other HTTP requests or # mirror anything. If we should try to do that, we'll get a # crash because object() isn't really an HTTP client. return OPDSImporter(self._db, collection=self.collection, metadata_client=object(), mirror=None, http_get=object())
def __init__(self, _db, collection, lookup=None): super(MetadataWranglerCollectionMonitor, self).__init__(_db, collection) self.lookup = lookup or MetadataWranglerOPDSLookup.from_config( self._db, collection=collection) self.importer = OPDSImporter( self._db, self.collection, data_source_name=DataSource.METADATA_WRANGLER, metadata_client=self.lookup, map_from_collection=True, )
def run_once(self, start, cutoff): if not self.lookup.authenticated: self.keep_timestamp = False return try: response = self.lookup.updates(start) self.lookup.check_content_type(response) except RemoteIntegrationException as e: self.log.error("Error getting updates for %r: %s", self.collection, e.debug_message) self.keep_timestamp = False return importer = OPDSImporter( self._db, self.collection, data_source_name=DataSource.METADATA_WRANGLER, metadata_client=self.lookup, map_from_collection=True, ) importer.import_from_feed(response.text)