def setUpClass(cls): cls.db_helper = DefinitionsDatabaseManager( cls.DB_FILE_PATH, cls.DB_CREATION_SCRIPT_PATH, cls.DEFAULT_DATA_TYPES_SCRIPT_PATH, cls.DEFAULT_DATA_SOURCE_TYPES_SCRIPT_PATH, cls.DEFAULT_OPERATIONS_SCRIPT_PATH) cls.ds_aosp_email = DataSource('Application', {'package_name': 'com.android.email'}) cls.ds_facebook = DataSource('Application', {'package_name': 'com.facebook.katana'}) cls.ds_aosp_sms = DataSource( 'Application', {'package_name': 'com.android.providers.telephony'}) cls.bad_ds = DataSource('Application', {}) cls.dv_info = DeviceInfo('3.0.0', 'GT-I9300') cls.op_info_email_aosp_email = OperationInfo('EmailMessageAOSPEmail', 'EmailMessage', cls.ds_aosp_email, ['GT-I9300'], [('2.3.7', '5.1.1')]) cls.op_info_image_aosp_email = OperationInfo('ImageFileAOSPEmail', 'ImageFile', cls.ds_aosp_email, ['GT-I9300'], [('2.3.7', '5.1.1')]) cls.op_info_image_facebook = OperationInfo('ImageFileFacebook', 'ImageFile', cls.ds_facebook, ['GT-I9300', 'XT1053'], [('2.3.7', '5.1.1')]) cls.op_info_sms_aosp_sms = OperationInfo('SmsMessageAOSPSms', 'SmsMessage', cls.ds_aosp_sms, ['GT-I9300', 'LG-D820'], [('2.0', '4.4.4')])
def teardown(self): # Close the session. self._db.close() # Roll back all database changes that happened during this # test, whether in the session that was just closed or some # other session. self.transaction.rollback() # Remove any database objects cached in the model classes but # associated with the now-rolled-back session. Collection.reset_cache() ConfigurationSetting.reset_cache() DataSource.reset_cache() DeliveryMechanism.reset_cache() ExternalIntegration.reset_cache() Genre.reset_cache() Library.reset_cache() # Also roll back any record of those changes in the # Configuration instance. for key in [ Configuration.SITE_CONFIGURATION_LAST_UPDATE, Configuration.LAST_CHECKED_FOR_SITE_CONFIGURATION_UPDATE ]: if key in Configuration.instance: del(Configuration.instance[key]) if self.search_mock: self.search_mock.stop()
def teardown(self): # Close the session. self._db.close() # Roll back all database changes that happened during this # test, whether in the session that was just closed or some # other session. self.transaction.rollback() # Remove any database objects cached in the model classes but # associated with the now-rolled-back session. Collection.reset_cache() ConfigurationSetting.reset_cache() DataSource.reset_cache() DeliveryMechanism.reset_cache() ExternalIntegration.reset_cache() Genre.reset_cache() Library.reset_cache() # Also roll back any record of those changes in the # Configuration instance. for key in [ Configuration.SITE_CONFIGURATION_LAST_UPDATE, Configuration.LAST_CHECKED_FOR_SITE_CONFIGURATION_UPDATE ]: if key in Configuration.instance: del (Configuration.instance[key]) if self.search_mock: self.search_mock.stop()
def setup(self): super(TestCoverageProvider, self).setup() gutenberg = DataSource.lookup(self._db, DataSource.GUTENBERG) self.input_identifier_types = gutenberg.primary_identifier_type self.output_source = DataSource.lookup(self._db, DataSource.OCLC) self.edition = self._edition(gutenberg.name) self.identifier = self.edition.primary_identifier
def run(self): self.load_configuration() DataSource.well_known_sources(self._db) try: self.do_run() except Exception, e: logging.error("Fatal exception while running script: %s", e, exc_info=e) raise e
def setup(self): super(TestPresentationReadyMonitor, self).setup() self.gutenberg = Identifier.GUTENBERG_ID self.oclc = DataSource.lookup(self._db, DataSource.OCLC) self.overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE) self.edition, self.edition_license_pool = self._edition( DataSource.GUTENBERG, with_license_pool=True) self.work = self._work(DataSource.GUTENBERG, with_license_pool=True) # Don't fake that the work is presentation ready, as we usually do, # because presentation readiness is what we're trying to test. self.work.presentation_ready = False
def test_cover_image_root(self): with self.temp_config(): gutenberg_illustrated = DataSource.lookup( self._db, DataSource.GUTENBERG_COVER_GENERATOR) overdrive = DataSource.lookup( self._db, DataSource.OVERDRIVE) eq_("http://s3.amazonaws.com/test-book-covers-s3-bucket/Gutenberg%20Illustrated/", S3Uploader.cover_image_root(gutenberg_illustrated)) eq_("http://s3.amazonaws.com/test-book-covers-s3-bucket/Overdrive/", S3Uploader.cover_image_root(overdrive)) eq_("http://s3.amazonaws.com/test-book-covers-s3-bucket/scaled/300/Overdrive/", S3Uploader.cover_image_root(overdrive, 300))
def _customlist(self, foreign_identifier=None, name=None, data_source_name=DataSource.NYT, num_entries=1, entries_exist_as_works=True ): data_source = DataSource.lookup(self._db, data_source_name) foreign_identifier = foreign_identifier or self._str now = datetime.utcnow() customlist, ignore = get_one_or_create( self._db, CustomList, create_method_kwargs=dict( created=now, updated=now, name=name or self._str, description=self._str, ), data_source=data_source, foreign_identifier=foreign_identifier ) editions = [] for i in range(num_entries): if entries_exist_as_works: work = self._work(with_open_access_download=True) edition = work.presentation_edition else: edition = self._edition( data_source_name, title="Item %s" % i) edition.permanent_work_id="Permanent work ID %s" % self._str customlist.add_entry( edition, "Annotation %s" % i, first_appearance=now) editions.append(edition) return customlist, editions
def test_staff_picks_and_best_sellers_sublane(self): staff_picks, ignore = self._customlist( foreign_identifier=u"Staff Picks", name=u"Staff Picks!", data_source_name=DataSource.LIBRARY_STAFF, num_entries=0) best_sellers, ignore = self._customlist( foreign_identifier=u"NYT Best Sellers", name=u"Best Sellers!", data_source_name=DataSource.NYT, num_entries=0) lane = Lane(self._db, "Everything", include_staff_picks=True, include_best_sellers=True) # A staff picks sublane and a best-sellers sublane have been # created for us. best, picks = lane.sublanes.lanes eq_("Best Sellers", best.display_name) eq_("Everything - Best Sellers", best.name) nyt = DataSource.lookup(self._db, DataSource.NYT) eq_(nyt.id, best.list_data_source_id) eq_("Staff Picks", picks.display_name) eq_("Everything - Staff Picks", picks.name) eq_([staff_picks.id], picks.list_ids)
def __init__(self, _db, api, datasource, batch_size=10, metadata_replacement_policy=None, circulationdata_replacement_policy=None, cutoff_time=None): self._db = _db self.api = api output_source = DataSource.lookup(_db, datasource) input_identifier_types = [output_source.primary_identifier_type] service_name = "%s Bibliographic Coverage Provider" % datasource metadata_replacement_policy = ( metadata_replacement_policy or ReplacementPolicy.from_metadata_source()) circulationdata_replacement_policy = ( circulationdata_replacement_policy or ReplacementPolicy.from_license_source()) self.metadata_replacement_policy = metadata_replacement_policy self.circulationdata_replacement_policy = circulationdata_replacement_policy super(BibliographicCoverageProvider, self).__init__(service_name, input_identifier_types, output_source, batch_size=batch_size, cutoff_time=cutoff_time)
def to_customlist(self, _db, dictreader): """Turn the CSV file in `dictreader` into a CustomList. TODO: Keep track of the list's current members. If any item was on the list but is no longer on the list, set its last_appeared date to its most recent appearance. """ data_source = DataSource.lookup(_db, self.data_source_name) now = datetime.datetime.utcnow() # Find or create the CustomList object itself. custom_list, was_new = get_one_or_create( _db, CustomList, data_source=data_source, foreign_identifier=self.foreign_identifier, create_method_kwargs = dict( created=now, ) ) custom_list.updated = now # Turn the rows of the CSV file into a sequence of Metadata # objects, then turn each Metadata into a CustomListEntry object. for metadata in self.to_metadata(dictreader): entry = self.metadata_to_list_entry( custom_list, data_source, now, metadata)
def test_book_url(self): identifier = self._identifier(foreign_id="ABOOK") buckets = {S3Uploader.OA_CONTENT_BUCKET_KEY: 'thebooks'} uploader = self._uploader(**buckets) m = uploader.book_url eq_(u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK.epub', m(identifier)) # The default extension is .epub, but a custom extension can # be specified. eq_(u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK.pdf', m(identifier, extension='pdf')) eq_(u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK.pdf', m(identifier, extension='.pdf')) # If a data source is provided, the book is stored underneath the # data source. unglueit = DataSource.lookup(self._db, DataSource.UNGLUE_IT) eq_( u'https://s3.amazonaws.com/thebooks/unglue.it/Gutenberg+ID/ABOOK.epub', m(identifier, data_source=unglueit)) # If a title is provided, the book's filename incorporates the # title, for the benefit of people who download the book onto # their hard drive. eq_( u'https://s3.amazonaws.com/thebooks/Gutenberg+ID/ABOOK/On+Books.epub', m(identifier, title="On Books")) # Non-open-access content can't be stored. assert_raises(NotImplementedError, m, identifier, open_access=False)
def test_cover_image_root(self): bucket = u'test-book-covers-s3-bucket' m = S3Uploader.cover_image_root gutenberg_illustrated = DataSource.lookup( self._db, DataSource.GUTENBERG_COVER_GENERATOR) overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE) eq_( "https://s3.amazonaws.com/test-book-covers-s3-bucket/Gutenberg+Illustrated/", m(bucket, gutenberg_illustrated)) eq_("https://s3.amazonaws.com/test-book-covers-s3-bucket/Overdrive/", m(bucket, overdrive)) eq_( "https://s3.amazonaws.com/test-book-covers-s3-bucket/scaled/300/Overdrive/", m(bucket, overdrive, 300))
def _customlist(self, foreign_identifier=None, name=None, data_source_name=DataSource.NYT, num_entries=1, entries_exist_as_works=True): data_source = DataSource.lookup(self._db, data_source_name) foreign_identifier = foreign_identifier or self._str now = datetime.utcnow() customlist, ignore = get_one_or_create( self._db, CustomList, create_method_kwargs=dict( created=now, updated=now, name=name or self._str, description=self._str, ), data_source=data_source, foreign_identifier=foreign_identifier) editions = [] for i in range(num_entries): if entries_exist_as_works: work = self._work(with_open_access_download=True) edition = work.presentation_edition else: edition = self._edition(data_source_name, title="Item %s" % i) edition.permanent_work_id = "Permanent work ID %s" % self._str customlist.add_entry(edition, "Annotation %s" % i, first_appearance=now) editions.append(edition) return customlist, editions
def test_mirror_404_error(self): mirror = DummyS3Uploader() h = DummyHTTPClient() h.queue_response(404) policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get) edition, pool = self._edition(with_license_pool=True) data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) link = LinkData( rel=Hyperlink.IMAGE, media_type=Representation.JPEG_MEDIA_TYPE, href="http://example.com/", ) link_obj, ignore = edition.primary_identifier.add_link( rel=link.rel, href=link.href, data_source=data_source, license_pool=pool, media_type=link.media_type, content=link.content, ) m = Metadata(data_source=data_source) m.mirror_link(edition, data_source, link, link_obj, policy) # Since we got a 404 error, the cover image was not mirrored. eq_(404, link_obj.resource.representation.status_code) eq_(None, link_obj.resource.representation.mirror_url) eq_([], mirror.uploaded)
def test_extract_data_from_feedparser_handles_exception(self): class DoomedFeedparserOPDSImporter(OPDSImporter): """An importer that can't extract metadata from feedparser.""" @classmethod def _data_detail_for_feedparser_entry(cls, entry, data_source): raise Exception("Utter failure!") data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) values, failures = DoomedFeedparserOPDSImporter.extract_data_from_feedparser( self.content_server_mini_feed, data_source) # No metadata was extracted. eq_(0, len(values.keys())) # There are 2 failures, both from exceptions. The 202 message # found in content_server_mini.opds is not extracted # here--it's extracted by extract_metadata_from_elementtree. eq_(2, len(failures)) # The first error message became a CoverageFailure. failure = failures[ 'urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441'] assert isinstance(failure, CoverageFailure) eq_(True, failure.transient) assert "Utter failure!" in failure.exception # The second error message became a CoverageFailure. failure = failures[ 'urn:librarysimplified.org/terms/id/Gutenberg%20ID/10557'] assert isinstance(failure, CoverageFailure) eq_(True, failure.transient) assert "Utter failure!" in failure.exception
def _edition(self, data_source_name=DataSource.GUTENBERG, identifier_type=Identifier.GUTENBERG_ID, with_license_pool=False, with_open_access_download=False, title=None, language="eng", authors=None, identifier_id=None): id = identifier_id or self._str source = DataSource.lookup(self._db, data_source_name) wr = Edition.for_foreign_id( self._db, source, identifier_type, id)[0] if not title: title = self._str wr.title = unicode(title) if language: wr.language = language if authors is None: authors = self._str if isinstance(authors, basestring): authors = [authors] if authors != []: wr.add_contributor(unicode(authors[0]), Contributor.PRIMARY_AUTHOR_ROLE) wr.author = unicode(authors[0]) for author in authors[1:]: wr.add_contributor(unicode(author), Contributor.AUTHOR_ROLE) if with_license_pool or with_open_access_download: pool = self._licensepool(wr, data_source_name=data_source_name, with_open_access_download=with_open_access_download) pool.set_presentation_edition() return wr, pool return wr
def test_non_open_access_book_not_mirrored(self): data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) m = Metadata(data_source=data_source) mirror = DummyS3Uploader(fail=True) h = DummyHTTPClient() policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get) content = "foo" link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href="http://example.com/", content=content, rights_uri=RightsStatus.IN_COPYRIGHT) identifier = self._identifier() link_obj, is_new = identifier.add_link( rel=link.rel, href=link.href, data_source=data_source, media_type=link.media_type, content=link.content, ) # The Hyperlink object makes it look like an open-access book, # but the context we have from the OPDS feed says that it's # not. m.mirror_link(None, data_source, link, link_obj, policy) # No HTTP requests were made. eq_([], h.requests) # Nothing was uploaded. eq_([], mirror.uploaded)
def parse_list_args(self, arg_line): """ :type arg_line: string :rtype: (string, DataSource, DeviceInfo) """ dt = None ds = None di = None parser = ArgumentParser() parser.add_argument('--data_type', '-dt') parser.add_argument('--source_type', '-st') parser.add_argument('--source_params', '-sp', nargs='*') parser.add_argument('--model', '-m') parser.add_argument('--version', '-v') args, unknown = parser.parse_known_args(shlex.split(arg_line)) if args.data_type: dt = args.data_type if args.source_type: ds = DataSource(args.source_type, {}) if args.source_params: params = {} for p in args.source_params: if ':' not in p: raise ValueError( "The parameter 'source_params' is a whitespace separated list of pairs param_name:param_value." ) p_split = re.search('(.*):(.*)', p) params[p_split.group(1)] = p_split.group(2) ds.info = params if args.model and args.version: if not self.android_version_pattern.match(args.version): raise ValueError('The Android version is not valid.') di = DeviceInfo(args.version, args.model) di.os_version = args.version return dt, ds, di
def data_source(self): """Look up or create a DataSource object representing the source of this OPDS feed. """ return DataSource.lookup( self._db, self.data_source_name, autocreate=True, offers_licenses=self.data_source_offers_licenses )
def test_mirror_open_access_link_mirror_failure(self): edition, pool = self._edition(with_license_pool=True) data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) m = Metadata(data_source=data_source) mirror = DummyS3Uploader(fail=True) h = DummyHTTPClient() policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get) content = open(self.sample_cover_path("test-book-cover.png")).read() link = LinkData(rel=Hyperlink.IMAGE, media_type=Representation.JPEG_MEDIA_TYPE, href="http://example.com/", content=content) link_obj, ignore = edition.primary_identifier.add_link( rel=link.rel, href=link.href, data_source=data_source, license_pool=pool, media_type=link.media_type, content=link.content, ) h.queue_response(200, media_type=Representation.JPEG_MEDIA_TYPE) m.mirror_link(edition, data_source, link, link_obj, policy) representation = link_obj.resource.representation # The representation was fetched successfully. eq_(None, representation.fetch_exception) assert representation.fetched_at != None # But mirroing failed. assert representation.mirror_exception != None eq_(None, representation.mirrored_at) eq_(link.media_type, representation.media_type) eq_(link.href, representation.url) # The mirror url should still be set. assert "Gutenberg" in representation.mirror_url assert representation.mirror_url.endswith( "%s/cover.jpg" % edition.primary_identifier.identifier) # Book content is still there since it wasn't mirrored. assert representation.content != None # the edition's identifier-associated license pool should not be # suppressed just because fetch failed on getting image. eq_(False, pool.suppressed) # the license pool only gets its license_exception column filled in # if fetch failed on getting an Hyperlink.OPEN_ACCESS_DOWNLOAD-type epub. eq_(None, pool.license_exception)
def _credential(self, data_source_name=DataSource.GUTENBERG, type=None, patron=None): data_source = DataSource.lookup(self._db, data_source_name) type = type or self._str patron = patron or self._patron() credential, is_new = Credential.persistent_token_create( self._db, data_source, type, patron ) return credential
def data_source(self): """Look up the DataSource object corresponding to the service we're running this data through. Out of an excess of caution, we look up the DataSource every time, rather than storing it, in case a CoverageProvider is ever used in an environment where the database session is scoped (e.g. the circulation manager). """ return DataSource.lookup(self._db, self.DATA_SOURCE_NAME)
def test_mirror_open_access_link_mirror_failure(self): mirror = DummyS3Uploader(fail=True) h = DummyHTTPClient() edition, pool = self._edition(with_license_pool=True) data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) policy = ReplacementPolicy(mirror=mirror, http_get=h.do_get) circulation_data = CirculationData( data_source=edition.data_source, primary_identifier=edition.primary_identifier, ) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href=self._url, ) link_obj, ignore = edition.primary_identifier.add_link( rel=link.rel, href=link.href, data_source=data_source, license_pool=pool, media_type=link.media_type, content=link.content, ) h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE) circulation_data.mirror_link(pool, data_source, link, link_obj, policy) representation = link_obj.resource.representation # The representation was fetched successfully. eq_(None, representation.fetch_exception) assert representation.fetched_at != None # But mirroing failed. assert representation.mirror_exception != None eq_(None, representation.mirrored_at) eq_(link.media_type, representation.media_type) eq_(link.href, representation.url) # The mirror url should still be set. assert "Gutenberg" in representation.mirror_url assert representation.mirror_url.endswith("%s.epub" % edition.title) # Book content is still there since it wasn't mirrored. assert representation.content != None # The license pool is suppressed when mirroring fails. eq_(True, pool.suppressed) assert representation.mirror_exception in pool.license_exception
def test_mirror_with_content_modifier(self): edition, pool = self._edition(with_license_pool=True) data_source = DataSource.lookup(self._db, DataSource.GUTENBERG) m = Metadata(data_source=data_source) mirror = DummyS3Uploader() def dummy_content_modifier(representation): representation.content = "Replaced Content" h = DummyHTTPClient() policy = ReplacementPolicy(mirror=mirror, content_modifier=dummy_content_modifier, http_get=h.do_get) link = LinkData( rel=Hyperlink.OPEN_ACCESS_DOWNLOAD, media_type=Representation.EPUB_MEDIA_TYPE, href="http://example.com/test.epub", content="I'm an epub", ) link_obj, ignore = edition.primary_identifier.add_link( rel=link.rel, href=link.href, data_source=data_source, license_pool=pool, media_type=link.media_type, content=link.content, ) h.queue_response(200, media_type=Representation.EPUB_MEDIA_TYPE) m.mirror_link(edition, data_source, link, link_obj, policy) representation = link_obj.resource.representation # The representation was fetched successfully. eq_(None, representation.fetch_exception) assert representation.fetched_at != None # The mirror url is set. assert "Gutenberg" in representation.mirror_url assert representation.mirror_url.endswith( "%s/%s.epub" % (edition.primary_identifier.identifier, edition.title)) # Content isn't there since it was mirrored. eq_(None, representation.content) # The representation was mirrored, with the modified content. eq_([representation], mirror.uploaded) eq_(["Replaced Content"], mirror.content)
def test_cover_image_url(self): identifier = self._identifier(foreign_id="ABOOK") buckets = {S3Uploader.BOOK_COVERS_BUCKET_KEY: 'thecovers'} uploader = self._uploader(**buckets) m = uploader.cover_image_url unglueit = DataSource.lookup(self._db, DataSource.UNGLUE_IT) identifier = self._identifier(foreign_id="ABOOK") eq_( u'https://s3.amazonaws.com/thecovers/scaled/601/unglue.it/Gutenberg+ID/ABOOK/filename', m(unglueit, identifier, "filename", scaled_size=601))
def _licensepool(self, edition, open_access=True, data_source_name=DataSource.GUTENBERG, with_open_access_download=False, set_edition_as_presentation=False, collection=None): source = DataSource.lookup(self._db, data_source_name) if not edition: edition = self._edition(data_source_name) collection = collection or self._default_collection pool, ignore = get_one_or_create( self._db, LicensePool, create_method_kwargs=dict( open_access=open_access), identifier=edition.primary_identifier, data_source=source, collection=collection, availability_time=datetime.utcnow() ) if set_edition_as_presentation: pool.presentation_edition = edition if with_open_access_download: pool.open_access = True url = "http://foo.com/" + self._str media_type = MediaTypes.EPUB_MEDIA_TYPE link, new = pool.identifier.add_link( Hyperlink.OPEN_ACCESS_DOWNLOAD, url, source, media_type ) # Add a DeliveryMechanism for this download pool.set_delivery_mechanism( media_type, DeliveryMechanism.NO_DRM, RightsStatus.GENERIC_OPEN_ACCESS, link.resource, ) representation, is_new = self._representation( url, media_type, "Dummy content", mirrored=True) link.resource.representation = representation else: # Add a DeliveryMechanism for this licensepool pool.set_delivery_mechanism( MediaTypes.EPUB_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM, RightsStatus.UNKNOWN, None ) pool.licenses_owned = pool.licenses_available = 1 return pool
def get_operation_info_by_id(self, id_): """ :param id_: int :rtype: OperationInfo """ with sqlite3.connect(self.db_file_path) as conn: conn.row_factory = sqlite3.Row c = conn.cursor() c.execute( """ SELECT dt.name AS dt_name, dst.name AS dst_name, o.name AS o_name FROM operations AS o, data_types AS dt, data_source_types AS dst WHERE o.data_type_id = dt.id AND o.data_source_type_id = dst.id AND o.id = ? """, [id_]) res = c.fetchone() data_type = res['dt_name'] data_source_type = res['dst_name'] op_name = res['o_name'] c.execute( """ SELECT param_name, param_value FROM data_source_params_values dspv WHERE dspv.operation_id = ? """, [id_]) param_values = {} for pv in c: param_values[pv['param_name']] = pv['param_value'] c.execute( 'SELECT model_number FROM device_models WHERE operation_id = ?', [id_]) supported_models = [] for dm in c: supported_models.append(dm['model_number']) c.execute( 'SELECT from_version, to_version FROM android_versions WHERE operation_id = ?', [id_]) supported_os_versions = [] for av in c: supported_os_versions.append( (av['from_version'], av['to_version'])) c.close() return OperationInfo(op_name, data_type, DataSource(data_source_type, param_values), supported_models, supported_os_versions)
def test_classifications_from_another_source_not_updated(self): # Set up an edition whose primary identifier has two # classifications. source1 = DataSource.lookup(self._db, DataSource.AXIS_360) source2 = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) edition = self._edition() identifier = edition.primary_identifier c1 = identifier.classify(source1, Subject.TAG, "i will persist") c2 = identifier.classify(source2, Subject.TAG, "i will perish") # Now we get some new metadata from source #2. subjects = [SubjectData(type=Subject.TAG, identifier="i will conquer")] metadata = Metadata(subjects=subjects, data_source=source2) replace = ReplacementPolicy(subjects=True) metadata.apply(edition, replace=replace) # The old classification from source #2 has been destroyed. # The old classification from source #1 is still there. eq_(['i will conquer', 'i will persist'], sorted([x.subject.identifier for x in identifier.classifications]))
def test_import_one_feed(self): # Check coverage records are created. monitor = OPDSImportMonitor(self._db, "http://url", DataSource.OA_CONTENT_SERVER, DoomedOPDSImporter) data_source = DataSource.lookup(self._db, DataSource.OA_CONTENT_SERVER) feed = self.content_server_mini_feed monitor.import_one_feed(feed, "http://root-url/") editions = self._db.query(Edition).all() # One edition has been imported eq_(1, len(editions)) [edition] = editions # That edition has a CoverageRecord. record = CoverageRecord.lookup( editions[0].primary_identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION) eq_(CoverageRecord.SUCCESS, record.status) eq_(None, record.exception) # The edition's primary identifier has a cover link whose # relative URL has been resolved relative to the URL we passed # into import_one_feed. [cover] = [ x.resource.url for x in editions[0].primary_identifier.links if x.rel == Hyperlink.IMAGE ] eq_("http://root-url/full-cover-image.png", cover) # The 202 status message in the feed caused a transient failure. # The exception caused a persistent failure. coverage_records = self._db.query(CoverageRecord).filter( CoverageRecord.operation == CoverageRecord.IMPORT_OPERATION, CoverageRecord.status != CoverageRecord.SUCCESS) eq_( sorted([ CoverageRecord.TRANSIENT_FAILURE, CoverageRecord.PERSISTENT_FAILURE ]), sorted([x.status for x in coverage_records])) identifier, ignore = Identifier.parse_urn( self._db, "urn:librarysimplified.org/terms/id/Gutenberg%20ID/10441") failure = CoverageRecord.lookup( identifier, data_source, operation=CoverageRecord.IMPORT_OPERATION) assert "Utter failure!" in failure.exception
def __init__(self, manager_class, data_source_name, list_identifier, list_name, primary_language, description, **manager_kwargs): data_source = DataSource.lookup(self._db, data_source_name) self.custom_list, is_new = get_one_or_create( self._db, CustomList, data_source_id=data_source.id, foreign_identifier=list_identifier, ) self.custom_list.primary_language = primary_language self.custom_list.description = description self.membership_manager = manager_class(self.custom_list, **manager_kwargs)
def test_parse_list_as_identifiers_with_data_source(self): lp1 = self._licensepool(None, data_source_name=DataSource.UNGLUE_IT) lp2 = self._licensepool(None, data_source_name=DataSource.FEEDBOOKS) lp3 = self._licensepool(None, data_source_name=DataSource.FEEDBOOKS) i1, i2, i3 = [lp.identifier for lp in [lp1, lp2, lp3]] i1.type = i2.type = Identifier.URI source = DataSource.lookup(self._db, DataSource.FEEDBOOKS) # Only URIs with a FeedBooks LicensePool are selected. identifiers = IdentifierInputScript.parse_identifier_list( self._db, Identifier.URI, source, []) eq_([i2], identifiers)
def to_customlist(self, _db): """Turn this NYTBestSeller list into a CustomList object.""" data_source = DataSource.lookup(_db, DataSource.NYT) l, was_new = get_one_or_create( _db, CustomList, data_source=data_source, foreign_identifier=self.foreign_identifier, create_method_kwargs=dict(created=self.created, )) l.name = self.name l.updated = self.updated self.update_custom_list(l) return l
def _edition(self, data_source_name=DataSource.GUTENBERG, identifier_type=Identifier.GUTENBERG_ID, with_license_pool=False, with_open_access_download=False, title=None, language="eng", authors=None, identifier_id=None, series=None, collection=None, publicationDate=None ): id = identifier_id or self._str source = DataSource.lookup(self._db, data_source_name) wr = Edition.for_foreign_id( self._db, source, identifier_type, id)[0] if not title: title = self._str wr.title = unicode(title) wr.medium = Edition.BOOK_MEDIUM if series: wr.series = series if language: wr.language = language if authors is None: authors = self._str if isinstance(authors, basestring): authors = [authors] if authors != []: wr.add_contributor(unicode(authors[0]), Contributor.PRIMARY_AUTHOR_ROLE) wr.author = unicode(authors[0]) for author in authors[1:]: wr.add_contributor(unicode(author), Contributor.AUTHOR_ROLE) if publicationDate: wr.published = publicationDate if with_license_pool or with_open_access_download: pool = self._licensepool( wr, data_source_name=data_source_name, with_open_access_download=with_open_access_download, collection=collection ) pool.set_presentation_edition() return wr, pool return wr
import site import sys from nose.tools import set_trace d = os.path.split(__file__)[0] site.addsitedir(os.path.join(d, "..")) from model import DataSource, LicensePool, SessionManager, Work, Identifier from model import production_session if __name__ == "__main__": session = production_session() data_source_name = sys.argv[1] identifier = sys.argv[2] data_source = DataSource.lookup(session, data_source_name) wid, ignore = Identifier.for_foreign_id(session, data_source.primary_identifier_type, identifier, False) pool = ( session.query(LicensePool) .filter(LicensePool.data_source == data_source) .filter(LicensePool.identifier == wid) .one() ) primary_edition = pool.edition() old_work = primary_edition.work if old_work: old_work.license_pools.remove(pool) primary_edition.work = None pool.calculate_work() work = pool.work work.calculate_presentation()
from nose.tools import set_trace d = os.path.split(__file__)[0] site.addsitedir(os.path.join(d, "..")) from model import ( Edition, production_session, DataSource, Work ) from sqlalchemy.orm import joinedload a = 0 db = production_session() start = 0 batch_size = 1000 source = DataSource.lookup(db, DataSource.THREEM) base_query = db.query(Work).join(Work.primary_edition).filter(Edition.data_source==source).order_by(Work.id).options( joinedload('summary'), joinedload('primary_edition', 'cover')).limit(batch_size) batch = base_query.offset(start).all() while batch: for work in batch: if not work.primary_edition: continue if work.primary_edition.cover: work.primary_edition.set_cover(work.primary_edition.cover) print work.primary_edition.cover_thumbnail_url else: print "!COVER %s" % work.primary_edition.primary_identifier if work.summary: work.set_summary(work.summary) print work.summary.content[:70]
genre = classification.genre.name genredata = classifier.genres[genre] parentage = [x.name for x in genredata.parents] + [genre] parentage.reverse() while len(parentage) < 3: parentage.append("") stats[tuple(parentage)][source] += 1 return stats if __name__ == '__main__': _db = production_session() out = csv.writer(sys.stdout) sources = [DataSource.lookup(_db, x) for x in [ DataSource.GUTENBERG, DataSource.OVERDRIVE, DataSource.THREEM]] out.writerow(["Classification", "Parent", "Grandparent"] + [x.name for x in sources] + ["Total"]) for audience in "Adult", "Young Adult", "Children": base_query = _db.query(Work).filter(Work.audience==audience) by_source = count_for_each_data_source(base_query, sources) row = [by_source[source] for source in sources] row += [sum(row)] row = [audience, "" ,""] + row out.writerow(row) out.writerow([]) for fiction, name in (True, "Fiction"), (False, "Nonfiction"), (None, "No Fiction Status"): base_query = _db.query(Work).filter(Work.fiction==fiction)
modified = datetime.datetime.fromtimestamp(os.stat(path).st_mtime) data = open(path).read() representation, ignore = get_one_or_create(db, Representation, url=url, data_source=data_source) representation.status_code = 200 representation.content = data representation.media_type = 'application/xml' representation.fetched_at = modified print url if __name__ == '__main__': data_dir = sys.argv[1] template = "http://cloudlibraryapi.3m.com/cirrus/library/a4tmf/data/cloudevents?startdate=%s&enddate=%s" db = production_session() threem = DataSource.lookup(db, DataSource.THREEM) cache_path = os.path.join(data_dir, "3M", "cache", "events") a = 0 for filename in os.listdir(cache_path): path = os.path.join(cache_path, filename) start_date = filename[:19] end_date = filename[20:] url = template % (start_date, end_date) imp(db, threem, path, url) a += 1 if not a % 10: db.commit() db.commit()
from model import ( CirculationEvent, DataSource, CoverageRecord, production_session, Identifier, Measurement, LicensePool, ) import json import gzip database = production_session() data_dir = sys.argv[1] OVERDRIVE = DataSource.lookup(database, DataSource.OVERDRIVE) TIME_FORMAT = "%Y-%m-%dT%H:%M:%S+00:00" def process_item(_db, item): overdrive_id = item['id'] event_name = item['event'] old_value = item.get('old_value', 0) new_value = item.get('new_value', 0) if event_name in ('check_out', 'check_in'): x = new_value new_value = old_value old_value = x elif event_name in ('hold_release', 'hold_place', 'license_remove'): pass
representation.status_code = status_code representation.content = data representation.location = location representation.media_type = media_type representation.fetched_at = modified return True if __name__ == '__main__': data_dir = sys.argv[1] db = production_session() oclc = OCLCLinkedData(db) d = os.path.join(data_dir, "OCLC Linked Data", "cache", "OCLC Number") cache = FakeCache(d, 4, False) source = DataSource.lookup(db, DataSource.OCLC_LINKED_DATA) min_oclc = 1284796 max_oclc = 2052405 batch_size = 10000 type = Identifier.OCLC_NUMBER cursor = min_oclc while cursor < max_oclc: first_time = time.time() processed = 0 max_batch = cursor + batch_size q = db.query(Identifier).filter(Identifier.type==Identifier.OCLC_NUMBER).filter(Identifier.id >= cursor).filter(Identifier.id < max_batch) for identifier in q: if imp(db, source, identifier, cache): processed += 1
fn = cache._filename(fn) modified = datetime.datetime.fromtimestamp(os.stat(fn).st_mtime) data = cache.open(fn).read() a = dict(collection_token=library['collectionToken'], item_id=i) url = OverdriveAPI.METADATA_ENDPOINT % a representation, ignore = get_one_or_create(db, Representation, url=url, data_source=data_source, identifier=identifier) representation.status_code = 200 representation.content = data representation.media_type = 'application/json' representation.fetched_at = modified print identifier if __name__ == '__main__': data_dir = sys.argv[1] overdrive = OverdriveAPI(data_dir) library = overdrive.get_library() db = production_session() b = overdrive.bibliographic_cache source = DataSource.lookup(db, DataSource.OVERDRIVE) q = db.query(Identifier).filter(Identifier.type==Identifier.OVERDRIVE_ID) a = 0 for i in q: imp(db, source, i, b, library) a += 1 if not a % 1000: db.commit()
def __init__(self, db): self._db = db self.overdrive = DataSource.lookup(self._db, DataSource.OVERDRIVE)