def _set_scalar_value(self, json_content, ast_object): """Parse a scalar string value and initialize an object's property with it. :param json_content: Scalar string value containing a required object's property :type json_content: str :param ast_object: AST object :type ast_object: Node """ required_object_properties = PropertiesGrouping.get_required_class_properties( ast_object.__class__) if len(required_object_properties) != 1: raise BaseSyntaxError( u"There are {0} required properties in {1} but only a single value ({2} was provided" .format(len(required_object_properties), encode(ast_object), json_content)) required_object_property_name, required_object_property = first_or_default( required_object_properties) self._set_property_value( ast_object, required_object_property_name, required_object_property, json_content, ) # We need to initialize other properties with default values self._set_non_scalar_value(None, ast_object, {required_object_property_name})
def test_syntax_analyzer_raises_missing_property_error_correctly( self, _, rwpm_manifest_content, expected_class_with_missing_property, expected_missing_property, ): # Arrange syntax_analyzer = RWPMSyntaxAnalyzer() input_steam = six.StringIO(rwpm_manifest_content) manifest_json = ManifestParser.get_manifest_json(input_steam) # Act syntax_analyzer.analyze(manifest_json) # Assert error = first_or_default(syntax_analyzer.context.errors) self.assertIsNotNone(error) self.assertEqual( expected_class_with_missing_property, error.node.__class__, ) self.assertEqual( expected_missing_property, error.node_property.key, )
def _extract_medium_from_links(self, links): """Extract the publication's medium from its links. :param links: List of links :type links: ast_core.LinkList :return: Publication's medium :rtype: Optional[str] """ derived = None for link in links: if not link.rels or not link.type or not self._is_acquisition_link( link): continue link_media_type, _ = first_or_default( self._extract_media_types_and_drm_scheme_from_link(link), default=(None, None), ) derived = Edition.medium_from_media_type(link_media_type) if derived: break return derived
def _check_manifest_self_link(self, node): """Ensure that manifest contains a correctly formatted self link. :param node: Manifest-like node :type node: Manifestlike """ for link in node.links: if not link.rels: with self._record_errors(): raise MANIFEST_LINK_MISSING_REL_PROPERTY_ERROR( node=link, node_property=Link.rels) self_link = first_or_default( node.links.get_by_rel(LinkRelationsRegistry.SELF.key)) if self_link is None: raise MANIFEST_MISSING_SELF_LINK_ERROR(node=node, node_property=None) parser = URIParser() try: parser.parse(self_link.href) except ValueParserError: raise MANIFEST_SELF_LINK_WRONG_HREF_FORMAT_ERROR( node=self_link, node_property=Link.href)
def visit(self, node): """Perform semantic analysis of the manifest node. :param node: Manifest-like node :type node: Manifestlike """ self._logger.debug(u"Started processing {0}".format(encode(node))) node.metadata.accept(self) node.links.accept(self) for link in node.links: if not link.rels: raise MISSING_MANIFEST_LINK_REL_PROPERTY_ERROR self_link = first_or_default( node.links.get_by_rel(RWPMLinkRelationsRegistry.SELF.key) ) if self_link is None: raise MISSING_SELF_LINK_ERROR parser = URIParser() try: parser.parse(self_link.href) except ValueParsingError: raise WRONG_SELF_LINK_HREF_FORMAT node.sub_collections.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def test_first_or_default(self, _, collection, expected_result, default_value=None): result = first_or_default(collection, default_value) eq_(result, expected_result)
def test_dpla_feed(self, feed_name, feed_url, feed_encoding="utf-8", feed_auth=None): """Ensure that the ODL 2.x parser correctly parses real production feeds. :param feed_name: Feed's name :type feed_name: str :param feed_url: Feed's URL :type feed_url: str :param feed_encoding: Feed's feed_encoding :type feed_encoding: str :param feed_auth: Feed's authentication information :type feed_auth: requests.auth.AuthBase """ # Arrange # NOTE: Using logging.basicConfig doesn't work because there are no associated handlers, # so we have to set the root's level manually logging.root.level = logging.WARNING parser_factory = ODLFeedParserFactory() parser = parser_factory.create() # Act while True: try: result = parser.parse_url(feed_url, feed_encoding, auth=feed_auth) except Exception as exception: logging.exception( "Unexpected exception occurred during parsing {0}".format( feed_name)) raise # Assert self.assertIsInstance(result, ManifestParserResult) self._print_errors(feed_name, feed_url, result) next_link = first_or_default(result.root.links.get_by_rel("next")) if not next_link: break feed_url = next_link.href
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the ODL license node. :param node: ODLLicense node :type node: ODLLicense """ self_link = ( first_or_default(node.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)) if node.links else None ) if ( not self_link or self_link.type != ODLMediaTypesRegistry.ODL_LICENSE_INFO_DOCUMENT.key ): with self._record_errors(): raise ODL_LICENSE_MUST_CONTAIN_SELF_LINK_TO_LICENSE_INFO_DOCUMENT_ERROR( node=node, node_property=None ) borrow_link = ( first_or_default( node.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) ) if node.links else None ) if ( not borrow_link or borrow_link.type != ODLMediaTypesRegistry.ODL_LICENSE_STATUS_DOCUMENT.key ): with self._record_errors(): raise ODL_LICENSE_MUST_CONTAIN_CHECKOUT_LINK_TO_LICENSE_STATUS_DOCUMENT_ERROR( node=node, node_property=None )
def test_syntax_analyzer_raises_value_parsing_error_when_property_has_incorrect_value( self, _, rwpm_manifest_content, expected_error_message): # Arrange syntax_analyzer = RWPMSyntaxAnalyzer() input_steam = six.StringIO(rwpm_manifest_content) manifest_json = ManifestParser.get_manifest_json(input_steam) # Act syntax_analyzer.analyze(manifest_json) # Assert error = first_or_default(syntax_analyzer.context.errors) self.assertIsNotNone(error) self.assertEqual( expected_error_message, six.text_type(error).strip("u"), )
def _extract_media_types_and_drm_scheme_from_link(self, link): """Extract information about content's media type and used DRM schema from the link. :param link: Link object :type link: ast_core.Link :return: 2-tuple containing information about the content's media type and its DRM schema :rtype: List[Tuple[str, str]] """ self._logger.debug( "Started extracting media types and a DRM scheme from {0}".format( encode(link))) media_types_and_drm_scheme = [] if link.properties: if (not link.properties.availability or link.properties.availability.state == opds2_ast.OPDS2AvailabilityType.AVAILABLE.value): for acquisition_object in link.properties.indirect_acquisition: nested_acquisition_object = acquisition_object while nested_acquisition_object.child: nested_acquisition_object = first_or_default( acquisition_object.child) drm_scheme = (acquisition_object.type if acquisition_object.type in DeliveryMechanism.KNOWN_DRM_TYPES else DeliveryMechanism.NO_DRM) media_types_and_drm_scheme.append( (nested_acquisition_object.type, drm_scheme)) else: if (link.type in MediaTypes.BOOK_MEDIA_TYPES or link.type in MediaTypes.AUDIOBOOK_MEDIA_TYPES): media_types_and_drm_scheme.append( (link.type, DeliveryMechanism.NO_DRM)) self._logger.debug( "Finished extracting media types and a DRM scheme from {0}: {1}". format(encode(link), encode(media_types_and_drm_scheme))) return media_types_and_drm_scheme
def _extract_link(self, link, feed_self_url, default_link_rel=None): """Extract a LinkData object from webpub-manifest-parser's link. :param link: webpub-manifest-parser's link :type link: ast_core.Link :param feed_self_url: Feed's self URL :type feed_self_url: str :param default_link_rel: Default link's relation :type default_link_rel: Optional[str] :return: Link metadata :rtype: LinkData """ self._logger.debug("Started extracting link metadata from {0}".format( encode(link))) # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one. rights_uri = RightsStatus.rights_uri_from_string("") rel = first_or_default(link.rels, default_link_rel) media_type = link.type href = link.href if feed_self_url and not urlparse(href).netloc: # This link is relative, so we need to get the absolute url href = urljoin(feed_self_url, href) link_metadata = LinkData( rel=rel, href=href, media_type=media_type, rights_uri=rights_uri, content=None, ) self._logger.debug( "Finished extracting link metadata from {0}: {1}".format( encode(link), encode(link_metadata))) return link_metadata
def test(self): # Arrange parser_factory = ODLFeedParserFactory() parser = parser_factory.create() input_file_path = os.path.join( os.path.dirname(__file__), "../../files/odl/feed.json" ) # Act result = parser.parse_file(input_file_path) # Assert self.assertIsInstance(result, ManifestParserResult) self.assertEqual(0, len(result.errors)) feed = result.root self.assertIsInstance(feed.metadata, OPDS2FeedMetadata) self.assertEqual("Test", feed.metadata.title) self.assertEqual(1, len(feed.publications)) [publication] = feed.publications self.assertEqual(1, len(publication.licenses)) [license] = publication.licenses self.assertEqual( "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799", license.metadata.identifier ) self.assertEqual(["application/epub+zip"], license.metadata.formats) self.assertEqual("USD", license.metadata.price.currency) self.assertEqual(7.99, license.metadata.price.value) self.assertEqual( datetime.datetime(2014, 4, 25, 12, 25, 21, tzinfo=tzoffset(None, 7200)), license.metadata.created, ) self.assertEqual(30, license.metadata.terms.checkouts) self.assertEqual( datetime.datetime(2016, 4, 25, 12, 25, 21, tzinfo=tzoffset(None, 7200)), license.metadata.terms.expires, ) self.assertEqual(10, license.metadata.terms.concurrency) self.assertEqual(5097600, license.metadata.terms.length) self.assertEqual( [ u"application/vnd.adobe.adept+xml", u"application/vnd.readium.lcp.license.v1.0+json", ], license.metadata.protection.formats, ) self.assertEqual(6, license.metadata.protection.devices) self.assertEqual(False, license.metadata.protection.copy_allowed) self.assertEqual(False, license.metadata.protection.print_allowed) self.assertEqual(False, license.metadata.protection.tts_allowed) self.assertEqual(2, len(license.links)) borrow_link = first_or_default( license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) ) self.assertEqual( "application/vnd.readium.license.status.v1.0+json", borrow_link.type ) self_link = first_or_default( license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) ) self.assertEqual("application/vnd.odl.info+json", self_link.type)
def test(self): # Arrange parser_factory = OPDS2FeedParserFactory() parser = parser_factory.create() input_file_path = os.path.join(os.path.dirname(__file__), "../../files/opds2/feed.json") # Act result = parser.parse_file(input_file_path) # Assert self.assertIsInstance(result, ManifestParserResult) self.assertEqual(0, len(result.errors)) feed = result.root self.assertIsInstance(feed, OPDS2Feed) self.assertIsInstance(feed.metadata, OPDS2FeedMetadata) self.assertEqual("Example listing publications", feed.metadata.title) self.assertIsInstance(feed.links, list) self.assertEqual(1, len(feed.links)) [manifest_link] = feed.links self.assertEqual(OPDS2LinkRelationsRegistry.SELF.key, manifest_link.rels[0]) self.assertEqual("http://example.com/new", manifest_link.href) self.assertEqual(OPDS2MediaTypesRegistry.OPDS_FEED.key, manifest_link.type) self.assertIsInstance(feed.publications, list) self.assertEqual(2, len(feed.publications)) publication = feed.publications[0] self.assertIsInstance(publication.metadata, PresentationMetadata) self.assertEqual("http://schema.org/Book", publication.metadata.type) self.assertEqual("Moby-Dick", publication.metadata.title) self.assertEqual( [Contributor(name="Herman Melville", roles=[], links=LinkList())], publication.metadata.authors, ) self.assertEqual("urn:isbn:978-3-16-148410-0", publication.metadata.identifier) self.assertEqual(["en"], publication.metadata.languages) self.assertEqual( datetime.datetime(2015, 9, 29, 17, 0, tzinfo=tzutc()), publication.metadata.modified, ) self.assertIsInstance(publication.links, list) self.assertEqual(len(publication.links), 2) publication_self_link = first_or_default( publication.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)) self.assertEqual(OPDS2LinkRelationsRegistry.SELF.key, publication_self_link.rels[0]) self.assertEqual("http://example.org/publication.json", publication_self_link.href) self.assertEqual(OPDS2MediaTypesRegistry.OPDS_PUBLICATION.key, publication_self_link.type) publication_acquisition_link = first_or_default( publication.links.get_by_rel( OPDS2LinkRelationsRegistry.OPEN_ACCESS.key)) self.assertEqual( OPDS2LinkRelationsRegistry.OPEN_ACCESS.key, publication_acquisition_link.rels[0], ) self.assertEqual("http://example.org/file.epub", publication_acquisition_link.href) self.assertEqual( OPDS2MediaTypesRegistry.EPUB_PUBLICATION_PACKAGE.key, publication_acquisition_link.type, ) self.assertIsInstance(publication.images, CompactCollection) self.assertIsInstance(publication.images.links, list) self.assertEqual(3, len(publication.images.links)) jpeg_cover_link = first_or_default( publication.images.links.get_by_href( "http://example.org/cover.jpg")) self.assertEqual([], jpeg_cover_link.rels) self.assertEqual("http://example.org/cover.jpg", jpeg_cover_link.href) self.assertEqual(OPDS2MediaTypesRegistry.JPEG.key, jpeg_cover_link.type) self.assertEqual(1400, jpeg_cover_link.height) self.assertEqual(800, jpeg_cover_link.width) small_jpeg_cover_link = first_or_default( publication.images.links.get_by_href( "http://example.org/cover-small.jpg")) self.assertEqual("http://example.org/cover-small.jpg", small_jpeg_cover_link.href) self.assertEqual(OPDS2MediaTypesRegistry.JPEG.key, small_jpeg_cover_link.type) self.assertEqual(700, small_jpeg_cover_link.height) self.assertEqual(400, small_jpeg_cover_link.width) svg_cover_link = first_or_default( publication.images.links.get_by_href( "http://example.org/cover.svg")) self.assertEqual(svg_cover_link.href, "http://example.org/cover.svg") self.assertEqual(svg_cover_link.type, OPDS2MediaTypesRegistry.SVG_XML.key) publication = feed.publications[1] self.assertIsInstance(publication.metadata, PresentationMetadata) self.assertEqual("http://schema.org/Book", publication.metadata.type) self.assertEqual("Adventures of Huckleberry Finn", publication.metadata.title) self.assertEqual( [ Contributor(name="Mark Twain", roles=[], links=LinkList()), Contributor(name="Samuel Langhorne Clemens", roles=[], links=LinkList()), ], publication.metadata.authors, ) self.assertEqual("urn:isbn:978-3-16-148410-0", publication.metadata.identifier) self.assertEqual(["eng", "fre"], publication.metadata.languages) self.assertEqual( datetime.datetime(2015, 9, 29, 0, 0, tzinfo=tzutc()), publication.metadata.published, ) self.assertEqual( datetime.datetime(2015, 9, 29, 17, 0, 0, tzinfo=tzutc()), publication.metadata.modified, ) self.assertIsInstance(publication.links, list) publication_acquisition_link = first_or_default( publication.links.get_by_rel( OPDS2LinkRelationsRegistry.BORROW.key)) self.assertEqual(OPDS2LinkRelationsRegistry.BORROW.key, publication_acquisition_link.rels[0]) self.assertEqual( OPDS2MediaTypesRegistry.OPDS_PUBLICATION.key, publication_acquisition_link.type, ) link_properties = publication_acquisition_link.properties self.assertIsInstance(link_properties, OPDS2LinkProperties) self.assertEqual(OPDS2AvailabilityType.AVAILABLE.value, link_properties.availability.state) self.assertEqual(2, len(link_properties.indirect_acquisition)) indirect_acquisition_object = link_properties.indirect_acquisition[0] self.assertEqual("application/vnd.adobe.adept+xml", indirect_acquisition_object.type) self.assertEqual(1, len(indirect_acquisition_object.child)) self.assertIsInstance(indirect_acquisition_object.child[0], OPDS2AcquisitionObject) self.assertEqual("application/epub+zip", indirect_acquisition_object.child[0].type) indirect_acquisition_object = link_properties.indirect_acquisition[1] self.assertEqual( "application/vnd.readium.lcp.license.v1.0+json", indirect_acquisition_object.type, ) self.assertEqual(1, len(indirect_acquisition_object.child)) self.assertIsInstance(indirect_acquisition_object.child[0], OPDS2AcquisitionObject) self.assertEqual("application/epub+zip", indirect_acquisition_object.child[0].type)
def _extract_publication_metadata(self, feed, publication, data_source_name): """Extract a Metadata object from webpub-manifest-parser's publication. :param publication: Feed object :type publication: opds2_ast.OPDS2Feed :param publication: Publication object :type publication: opds2_ast.OPDS2Publication :param data_source_name: Data source's name :type data_source_name: str :return: Publication's metadata :rtype: Metadata """ self._logger.debug( "Started extracting metadata from publication {0}".format( encode(publication))) title = publication.metadata.title if title == OPDSFeed.NO_TITLE: title = None subtitle = publication.metadata.subtitle languages = first_or_default(publication.metadata.languages) derived_medium = self._extract_medium_from_links(publication.links) medium = self._extract_medium(publication, derived_medium) publisher = first_or_default(publication.metadata.publishers) if publisher: publisher = publisher.name imprint = first_or_default(publication.metadata.imprints) if imprint: imprint = imprint.name published = publication.metadata.published subjects = self._extract_subjects(publication.metadata.subjects) contributors = ( self._extract_contributors(publication.metadata.authors, Contributor.AUTHOR_ROLE) + self._extract_contributors(publication.metadata.translators, Contributor.TRANSLATOR_ROLE) + self._extract_contributors(publication.metadata.editors, Contributor.EDITOR_ROLE) + self._extract_contributors(publication.metadata.artists, Contributor.ARTIST_ROLE) + self._extract_contributors(publication.metadata.illustrators, Contributor.ILLUSTRATOR_ROLE) + self._extract_contributors(publication.metadata.letterers, Contributor.LETTERER_ROLE) + self._extract_contributors(publication.metadata.pencilers, Contributor.PENCILER_ROLE) + self._extract_contributors(publication.metadata.colorists, Contributor.COLORIST_ROLE) + self._extract_contributors(publication.metadata.inkers, Contributor.INKER_ROLE) + self._extract_contributors(publication.metadata.narrators, Contributor.NARRATOR_ROLE) + self._extract_contributors(publication.metadata.contributors, Contributor.CONTRIBUTOR_ROLE)) feed_self_url = first_or_default( feed.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)).href links = self._extract_links(publication, feed_self_url) last_opds_update = publication.metadata.modified identifier = self._extract_identifier(publication) identifier_data = IdentifierData(type=identifier.type, identifier=identifier.identifier) # FIXME: There are no measurements in OPDS 2.0 measurements = [] # FIXME: There is no series information in OPDS 2.0 series = None series_position = None # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one rights_uri = RightsStatus.rights_uri_from_string("") circulation_data = CirculationData( default_rights_uri=rights_uri, data_source=data_source_name, primary_identifier=identifier_data, links=links, licenses_owned=LicensePool.UNLIMITED_ACCESS, licenses_available=LicensePool.UNLIMITED_ACCESS, licenses_reserved=0, patrons_in_hold_queue=0, formats=[], ) formats = self._find_formats_in_non_open_access_acquisition_links( publication.links, links, rights_uri, circulation_data) circulation_data.formats.extend(formats) metadata = Metadata( data_source=data_source_name, title=title, subtitle=subtitle, language=languages, medium=medium, publisher=publisher, published=published, imprint=imprint, primary_identifier=identifier_data, subjects=subjects, contributors=contributors, measurements=measurements, series=series, series_position=series_position, links=links, data_source_last_updated=last_opds_update, circulation=circulation_data, ) self._logger.debug( "Finished extracting metadata from publication {0}: {1}".format( encode(publication), encode(metadata))) return metadata
def test(self): # Arrange parser_factory = RWPMDocumentParserFactory() parser = parser_factory.create() input_file_path = os.path.join( os.path.dirname(__file__), "../../files/rwpm/spec_example.json" ) # Act manifest = parser.parse_file(input_file_path) # Assert self.assertIsInstance(manifest.context, list) self.assertEqual(1, len(manifest.context)) [context] = manifest.context self.assertEqual(context, "https://readium.org/webpub-manifest/context.jsonld") self.assertIsInstance(manifest.metadata, Metadata) self.assertEqual("http://schema.org/Book", manifest.metadata.type) self.assertEqual("Moby-Dick", manifest.metadata.title) self.assertEqual( [Contributor(name="Herman Melville", roles=[], links=LinkList())], manifest.metadata.authors, ) self.assertEqual("urn:isbn:978031600000X", manifest.metadata.identifier) self.assertEqual(["en"], manifest.metadata.languages) self.assertEqual( datetime.datetime(2015, 9, 29, 17, 0, 0), manifest.metadata.modified ) self.assertIsInstance(manifest.links, list) self.assertEqual(3, len(manifest.links)) self_link = first_or_default( manifest.links.get_by_rel(RWPMLinkRelationsRegistry.SELF.key) ) self.assertIsNotNone(self_link) self.assertIn(RWPMLinkRelationsRegistry.SELF.key, self_link.rels) self.assertEqual("https://example.com/manifest.json", self_link.href) self.assertEqual(RWPMMediaTypesRegistry.MANIFEST.key, self_link.type) alternate_link = first_or_default( manifest.links.get_by_rel(RWPMLinkRelationsRegistry.ALTERNATE.key) ) self.assertIsNotNone(alternate_link) self.assertIn(RWPMLinkRelationsRegistry.ALTERNATE.key, alternate_link.rels) self.assertEqual("https://example.com/publication.epub", alternate_link.href) self.assertEqual( RWPMMediaTypesRegistry.EPUB_PUBLICATION_PACKAGE.key, alternate_link.type ) search_link = first_or_default( manifest.links.get_by_rel(RWPMLinkRelationsRegistry.SEARCH.key) ) self.assertIsNotNone(search_link) self.assertIn(RWPMLinkRelationsRegistry.SEARCH.key, search_link.rels) self.assertEqual("https://example.com/search{?query}", search_link.href) self.assertEqual(RWPMMediaTypesRegistry.HTML.key, search_link.type) self.assertIsInstance(manifest.reading_order, CompactCollection) self.assertIsInstance(manifest.reading_order.links, list) self.assertEqual(2, len(manifest.reading_order.links)) reading_order_link = manifest.reading_order.links[0] self.assertEqual("https://example.com/c001.html", reading_order_link.href) self.assertEqual(RWPMMediaTypesRegistry.HTML.key, reading_order_link.type) self.assertEqual("Chapter 1", reading_order_link.title) reading_order_link = manifest.reading_order.links[1] self.assertEqual("https://example.com/c002.html", reading_order_link.href) self.assertEqual(RWPMMediaTypesRegistry.HTML.key, reading_order_link.type) self.assertEqual("Chapter 2", reading_order_link.title) resources_sub_collection = manifest.resources self.assertEqual(5, len(resources_sub_collection.links)) self.assertEqual( [RWPMLinkRelationsRegistry.COVER.key], resources_sub_collection.links[0].rels, ) self.assertEqual( "https://example.com/cover.jpg", resources_sub_collection.links[0].href ) self.assertEqual( RWPMMediaTypesRegistry.JPEG.key, resources_sub_collection.links[0].type ) self.assertEqual(600, resources_sub_collection.links[0].height) self.assertEqual(400, resources_sub_collection.links[0].width) self.assertEqual( "https://example.com/style.css", resources_sub_collection.links[1].href ) self.assertEqual( RWPMMediaTypesRegistry.CSS.key, resources_sub_collection.links[1].type ) self.assertEqual( "https://example.com/whale.jpg", resources_sub_collection.links[2].href ) self.assertEqual( RWPMMediaTypesRegistry.JPEG.key, resources_sub_collection.links[2].type ) self.assertEqual( "https://example.com/boat.svg", resources_sub_collection.links[3].href ) self.assertEqual( RWPMMediaTypesRegistry.SVG_XML.key, resources_sub_collection.links[3].type ) self.assertEqual( "https://example.com/notes.html", resources_sub_collection.links[4].href ) self.assertEqual( RWPMMediaTypesRegistry.HTML.key, resources_sub_collection.links[4].type )
def export( self, feed_url: str, feed_login: str, feed_password: str, output_file: str ) -> None: self._logger.info(f"Started exporting {feed_url}") with open(output_file, "w") as output_file: output_file.write( "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format( "url", "page", "title", "identifier", "self_link", "oa_acquisition_link", ) ) page = 1 while True: try: feed = self._parse_feed(feed_url, feed_login, feed_password) for publication in feed.publications: identifier = publication.metadata.identifier title = publication.metadata.title.replace( "'", "'" ) self_link = first_or_default( publication.links.get_by_rel( OPDS2LinkRelationsRegistry.SELF.key ) ) self_link_href = self_link.href if self_link is not None else "" oa_acquisition_link = first_or_default( publication.links.get_by_rel( OPDS2LinkRelationsRegistry.OPEN_ACCESS.key ) ) oa_acquisition_link_href = ( oa_acquisition_link.href if oa_acquisition_link is not None else "" ) output_file.write( "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format( feed_url, page, title, identifier, self_link_href, oa_acquisition_link_href, ) ) next_link = first_or_default(feed.links.get_by_rel("next")) if not next_link: break feed_url = next_link.href page += 1 except Exception: self._logger.exception( "An unexpected error occurred during parsing {0}".format(feed_url) ) output_file_path = os.path.join(os.getcwd(), output_file.name) self._logger.info(f"Finished exporting. The results have been saved to {output_file_path}")