Exemplos de first_or_default em Python, exemplos de webpub_manifest_parser.utils.first_or_default em Python

Exemplo n.º 1

0

Exibir arquivo

    def _set_scalar_value(self, json_content, ast_object):
        """Parse a scalar string value and initialize an object's property with it.

        :param json_content: Scalar string value containing a required object's property
        :type json_content: str

        :param ast_object: AST object
        :type ast_object: Node
        """
        required_object_properties = PropertiesGrouping.get_required_class_properties(
            ast_object.__class__)

        if len(required_object_properties) != 1:
            raise BaseSyntaxError(
                u"There are {0} required properties in {1} but only a single value ({2} was provided"
                .format(len(required_object_properties), encode(ast_object),
                        json_content))

        required_object_property_name, required_object_property = first_or_default(
            required_object_properties)

        self._set_property_value(
            ast_object,
            required_object_property_name,
            required_object_property,
            json_content,
        )

        # We need to initialize other properties with default values
        self._set_non_scalar_value(None, ast_object,
                                   {required_object_property_name})

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_syntax.py Projeto: ThePalaceProject/webpub-manifest-parser

    def test_syntax_analyzer_raises_missing_property_error_correctly(
        self,
        _,
        rwpm_manifest_content,
        expected_class_with_missing_property,
        expected_missing_property,
    ):
        # Arrange
        syntax_analyzer = RWPMSyntaxAnalyzer()
        input_steam = six.StringIO(rwpm_manifest_content)
        manifest_json = ManifestParser.get_manifest_json(input_steam)

        # Act
        syntax_analyzer.analyze(manifest_json)

        # Assert
        error = first_or_default(syntax_analyzer.context.errors)

        self.assertIsNotNone(error)
        self.assertEqual(
            expected_class_with_missing_property,
            error.node.__class__,
        )
        self.assertEqual(
            expected_missing_property,
            error.node_property.key,
        )

Exemplo n.º 3

0

Exibir arquivo

    def _extract_medium_from_links(self, links):
        """Extract the publication's medium from its links.

        :param links: List of links
        :type links: ast_core.LinkList

        :return: Publication's medium
        :rtype: Optional[str]
        """
        derived = None

        for link in links:
            if not link.rels or not link.type or not self._is_acquisition_link(
                    link):
                continue

            link_media_type, _ = first_or_default(
                self._extract_media_types_and_drm_scheme_from_link(link),
                default=(None, None),
            )
            derived = Edition.medium_from_media_type(link_media_type)

            if derived:
                break

        return derived

Exemplo n.º 4

0

Exibir arquivo

Arquivo: semantic.py Projeto: ThePalaceProject/webpub-manifest-parser

    def _check_manifest_self_link(self, node):
        """Ensure that manifest contains a correctly formatted self link.

        :param node: Manifest-like node
        :type node: Manifestlike
        """
        for link in node.links:
            if not link.rels:
                with self._record_errors():
                    raise MANIFEST_LINK_MISSING_REL_PROPERTY_ERROR(
                        node=link, node_property=Link.rels)

        self_link = first_or_default(
            node.links.get_by_rel(LinkRelationsRegistry.SELF.key))

        if self_link is None:
            raise MANIFEST_MISSING_SELF_LINK_ERROR(node=node,
                                                   node_property=None)

        parser = URIParser()

        try:
            parser.parse(self_link.href)
        except ValueParserError:
            raise MANIFEST_SELF_LINK_WRONG_HREF_FORMAT_ERROR(
                node=self_link, node_property=Link.href)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: semantic.py Projeto: NYPL-Simplified/python-webpub-manifest-parser

    def visit(self, node):
        """Perform semantic analysis of the manifest node.

        :param node: Manifest-like node
        :type node: Manifestlike
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        node.metadata.accept(self)
        node.links.accept(self)

        for link in node.links:
            if not link.rels:
                raise MISSING_MANIFEST_LINK_REL_PROPERTY_ERROR

        self_link = first_or_default(
            node.links.get_by_rel(RWPMLinkRelationsRegistry.SELF.key)
        )

        if self_link is None:
            raise MISSING_SELF_LINK_ERROR

        parser = URIParser()

        try:
            parser.parse(self_link.href)
        except ValueParsingError:
            raise WRONG_SELF_LINK_HREF_FORMAT

        node.sub_collections.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))

Exemplo n.º 6

0

Exibir arquivo

    def test_first_or_default(self,
                              _,
                              collection,
                              expected_result,
                              default_value=None):
        result = first_or_default(collection, default_value)

        eq_(result, expected_result)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_production_feeds.py Projeto: ThePalaceProject/webpub-manifest-parser

    def test_dpla_feed(self,
                       feed_name,
                       feed_url,
                       feed_encoding="utf-8",
                       feed_auth=None):
        """Ensure that the ODL 2.x parser correctly parses real production feeds.

        :param feed_name: Feed's name
        :type feed_name: str

        :param feed_url: Feed's URL
        :type feed_url: str

        :param feed_encoding: Feed's feed_encoding
        :type feed_encoding: str

        :param feed_auth: Feed's authentication information
        :type feed_auth: requests.auth.AuthBase
        """
        # Arrange

        # NOTE: Using logging.basicConfig doesn't work because there are no associated handlers,
        # so we have to set the root's level manually
        logging.root.level = logging.WARNING

        parser_factory = ODLFeedParserFactory()
        parser = parser_factory.create()

        # Act
        while True:
            try:
                result = parser.parse_url(feed_url,
                                          feed_encoding,
                                          auth=feed_auth)
            except Exception as exception:
                logging.exception(
                    "Unexpected exception occurred during parsing {0}".format(
                        feed_name))
                raise

            # Assert
            self.assertIsInstance(result, ManifestParserResult)

            self._print_errors(feed_name, feed_url, result)

            next_link = first_or_default(result.root.links.get_by_rel("next"))

            if not next_link:
                break

            feed_url = next_link.href

Exemplo n.º 8

0

Exibir arquivo

    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the ODL license node.

        :param node: ODLLicense node
        :type node: ODLLicense
        """
        self_link = (
            first_or_default(node.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key))
            if node.links
            else None
        )

        if (
            not self_link
            or self_link.type != ODLMediaTypesRegistry.ODL_LICENSE_INFO_DOCUMENT.key
        ):
            with self._record_errors():
                raise ODL_LICENSE_MUST_CONTAIN_SELF_LINK_TO_LICENSE_INFO_DOCUMENT_ERROR(
                    node=node, node_property=None
                )

        borrow_link = (
            first_or_default(
                node.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
            )
            if node.links
            else None
        )

        if (
            not borrow_link
            or borrow_link.type != ODLMediaTypesRegistry.ODL_LICENSE_STATUS_DOCUMENT.key
        ):
            with self._record_errors():
                raise ODL_LICENSE_MUST_CONTAIN_CHECKOUT_LINK_TO_LICENSE_STATUS_DOCUMENT_ERROR(
                    node=node, node_property=None
                )

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_syntax.py Projeto: ThePalaceProject/webpub-manifest-parser

    def test_syntax_analyzer_raises_value_parsing_error_when_property_has_incorrect_value(
            self, _, rwpm_manifest_content, expected_error_message):
        # Arrange
        syntax_analyzer = RWPMSyntaxAnalyzer()
        input_steam = six.StringIO(rwpm_manifest_content)
        manifest_json = ManifestParser.get_manifest_json(input_steam)

        # Act
        syntax_analyzer.analyze(manifest_json)

        # Assert
        error = first_or_default(syntax_analyzer.context.errors)

        self.assertIsNotNone(error)
        self.assertEqual(
            expected_error_message,
            six.text_type(error).strip("u"),
        )

Exemplo n.º 10

0

Exibir arquivo

    def _extract_media_types_and_drm_scheme_from_link(self, link):
        """Extract information about content's media type and used DRM schema from the link.

        :param link: Link object
        :type link: ast_core.Link

        :return: 2-tuple containing information about the content's media type and its DRM schema
        :rtype: List[Tuple[str, str]]
        """
        self._logger.debug(
            "Started extracting media types and a DRM scheme from {0}".format(
                encode(link)))

        media_types_and_drm_scheme = []

        if link.properties:
            if (not link.properties.availability
                    or link.properties.availability.state
                    == opds2_ast.OPDS2AvailabilityType.AVAILABLE.value):
                for acquisition_object in link.properties.indirect_acquisition:
                    nested_acquisition_object = acquisition_object

                    while nested_acquisition_object.child:
                        nested_acquisition_object = first_or_default(
                            acquisition_object.child)

                    drm_scheme = (acquisition_object.type
                                  if acquisition_object.type
                                  in DeliveryMechanism.KNOWN_DRM_TYPES else
                                  DeliveryMechanism.NO_DRM)

                    media_types_and_drm_scheme.append(
                        (nested_acquisition_object.type, drm_scheme))
        else:
            if (link.type in MediaTypes.BOOK_MEDIA_TYPES
                    or link.type in MediaTypes.AUDIOBOOK_MEDIA_TYPES):
                media_types_and_drm_scheme.append(
                    (link.type, DeliveryMechanism.NO_DRM))

        self._logger.debug(
            "Finished extracting media types and a DRM scheme from {0}: {1}".
            format(encode(link), encode(media_types_and_drm_scheme)))

        return media_types_and_drm_scheme

Exemplo n.º 11

0

Exibir arquivo

    def _extract_link(self, link, feed_self_url, default_link_rel=None):
        """Extract a LinkData object from webpub-manifest-parser's link.

        :param link: webpub-manifest-parser's link
        :type link: ast_core.Link

        :param feed_self_url: Feed's self URL
        :type feed_self_url: str

        :param default_link_rel: Default link's relation
        :type default_link_rel: Optional[str]

        :return: Link metadata
        :rtype: LinkData
        """
        self._logger.debug("Started extracting link metadata from {0}".format(
            encode(link)))

        # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one.
        rights_uri = RightsStatus.rights_uri_from_string("")
        rel = first_or_default(link.rels, default_link_rel)
        media_type = link.type
        href = link.href

        if feed_self_url and not urlparse(href).netloc:
            # This link is relative, so we need to get the absolute url
            href = urljoin(feed_self_url, href)

        link_metadata = LinkData(
            rel=rel,
            href=href,
            media_type=media_type,
            rights_uri=rights_uri,
            content=None,
        )

        self._logger.debug(
            "Finished extracting link metadata from {0}: {1}".format(
                encode(link), encode(link_metadata)))

        return link_metadata

Exemplo n.º 12

0

Exibir arquivo

    def test(self):
        # Arrange
        parser_factory = ODLFeedParserFactory()
        parser = parser_factory.create()
        input_file_path = os.path.join(
            os.path.dirname(__file__), "../../files/odl/feed.json"
        )

        # Act
        result = parser.parse_file(input_file_path)

        # Assert
        self.assertIsInstance(result, ManifestParserResult)
        self.assertEqual(0, len(result.errors))

        feed = result.root
        self.assertIsInstance(feed.metadata, OPDS2FeedMetadata)
        self.assertEqual("Test", feed.metadata.title)

        self.assertEqual(1, len(feed.publications))
        [publication] = feed.publications

        self.assertEqual(1, len(publication.licenses))
        [license] = publication.licenses

        self.assertEqual(
            "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799", license.metadata.identifier
        )
        self.assertEqual(["application/epub+zip"], license.metadata.formats)

        self.assertEqual("USD", license.metadata.price.currency)
        self.assertEqual(7.99, license.metadata.price.value)

        self.assertEqual(
            datetime.datetime(2014, 4, 25, 12, 25, 21, tzinfo=tzoffset(None, 7200)),
            license.metadata.created,
        )

        self.assertEqual(30, license.metadata.terms.checkouts)
        self.assertEqual(
            datetime.datetime(2016, 4, 25, 12, 25, 21, tzinfo=tzoffset(None, 7200)),
            license.metadata.terms.expires,
        )
        self.assertEqual(10, license.metadata.terms.concurrency)
        self.assertEqual(5097600, license.metadata.terms.length)

        self.assertEqual(
            [
                u"application/vnd.adobe.adept+xml",
                u"application/vnd.readium.lcp.license.v1.0+json",
            ],
            license.metadata.protection.formats,
        )
        self.assertEqual(6, license.metadata.protection.devices)
        self.assertEqual(False, license.metadata.protection.copy_allowed)
        self.assertEqual(False, license.metadata.protection.print_allowed)
        self.assertEqual(False, license.metadata.protection.tts_allowed)

        self.assertEqual(2, len(license.links))
        borrow_link = first_or_default(
            license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
        )
        self.assertEqual(
            "application/vnd.readium.license.status.v1.0+json", borrow_link.type
        )

        self_link = first_or_default(
            license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)
        )
        self.assertEqual("application/vnd.odl.info+json", self_link.type)

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_parser.py Projeto: ThePalaceProject/webpub-manifest-parser

    def test(self):
        # Arrange
        parser_factory = OPDS2FeedParserFactory()
        parser = parser_factory.create()
        input_file_path = os.path.join(os.path.dirname(__file__),
                                       "../../files/opds2/feed.json")

        # Act
        result = parser.parse_file(input_file_path)

        # Assert
        self.assertIsInstance(result, ManifestParserResult)
        self.assertEqual(0, len(result.errors))

        feed = result.root
        self.assertIsInstance(feed, OPDS2Feed)

        self.assertIsInstance(feed.metadata, OPDS2FeedMetadata)
        self.assertEqual("Example listing publications", feed.metadata.title)

        self.assertIsInstance(feed.links, list)
        self.assertEqual(1, len(feed.links))
        [manifest_link] = feed.links
        self.assertEqual(OPDS2LinkRelationsRegistry.SELF.key,
                         manifest_link.rels[0])
        self.assertEqual("http://example.com/new", manifest_link.href)
        self.assertEqual(OPDS2MediaTypesRegistry.OPDS_FEED.key,
                         manifest_link.type)

        self.assertIsInstance(feed.publications, list)
        self.assertEqual(2, len(feed.publications))
        publication = feed.publications[0]

        self.assertIsInstance(publication.metadata, PresentationMetadata)
        self.assertEqual("http://schema.org/Book", publication.metadata.type)
        self.assertEqual("Moby-Dick", publication.metadata.title)
        self.assertEqual(
            [Contributor(name="Herman Melville", roles=[], links=LinkList())],
            publication.metadata.authors,
        )
        self.assertEqual("urn:isbn:978-3-16-148410-0",
                         publication.metadata.identifier)
        self.assertEqual(["en"], publication.metadata.languages)
        self.assertEqual(
            datetime.datetime(2015, 9, 29, 17, 0, tzinfo=tzutc()),
            publication.metadata.modified,
        )

        self.assertIsInstance(publication.links, list)
        self.assertEqual(len(publication.links), 2)

        publication_self_link = first_or_default(
            publication.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key))
        self.assertEqual(OPDS2LinkRelationsRegistry.SELF.key,
                         publication_self_link.rels[0])
        self.assertEqual("http://example.org/publication.json",
                         publication_self_link.href)
        self.assertEqual(OPDS2MediaTypesRegistry.OPDS_PUBLICATION.key,
                         publication_self_link.type)

        publication_acquisition_link = first_or_default(
            publication.links.get_by_rel(
                OPDS2LinkRelationsRegistry.OPEN_ACCESS.key))
        self.assertEqual(
            OPDS2LinkRelationsRegistry.OPEN_ACCESS.key,
            publication_acquisition_link.rels[0],
        )
        self.assertEqual("http://example.org/file.epub",
                         publication_acquisition_link.href)
        self.assertEqual(
            OPDS2MediaTypesRegistry.EPUB_PUBLICATION_PACKAGE.key,
            publication_acquisition_link.type,
        )

        self.assertIsInstance(publication.images, CompactCollection)
        self.assertIsInstance(publication.images.links, list)
        self.assertEqual(3, len(publication.images.links))

        jpeg_cover_link = first_or_default(
            publication.images.links.get_by_href(
                "http://example.org/cover.jpg"))
        self.assertEqual([], jpeg_cover_link.rels)
        self.assertEqual("http://example.org/cover.jpg", jpeg_cover_link.href)
        self.assertEqual(OPDS2MediaTypesRegistry.JPEG.key,
                         jpeg_cover_link.type)
        self.assertEqual(1400, jpeg_cover_link.height)
        self.assertEqual(800, jpeg_cover_link.width)

        small_jpeg_cover_link = first_or_default(
            publication.images.links.get_by_href(
                "http://example.org/cover-small.jpg"))
        self.assertEqual("http://example.org/cover-small.jpg",
                         small_jpeg_cover_link.href)
        self.assertEqual(OPDS2MediaTypesRegistry.JPEG.key,
                         small_jpeg_cover_link.type)
        self.assertEqual(700, small_jpeg_cover_link.height)
        self.assertEqual(400, small_jpeg_cover_link.width)

        svg_cover_link = first_or_default(
            publication.images.links.get_by_href(
                "http://example.org/cover.svg"))
        self.assertEqual(svg_cover_link.href, "http://example.org/cover.svg")
        self.assertEqual(svg_cover_link.type,
                         OPDS2MediaTypesRegistry.SVG_XML.key)

        publication = feed.publications[1]
        self.assertIsInstance(publication.metadata, PresentationMetadata)
        self.assertEqual("http://schema.org/Book", publication.metadata.type)
        self.assertEqual("Adventures of Huckleberry Finn",
                         publication.metadata.title)
        self.assertEqual(
            [
                Contributor(name="Mark Twain", roles=[], links=LinkList()),
                Contributor(name="Samuel Langhorne Clemens",
                            roles=[],
                            links=LinkList()),
            ],
            publication.metadata.authors,
        )
        self.assertEqual("urn:isbn:978-3-16-148410-0",
                         publication.metadata.identifier)
        self.assertEqual(["eng", "fre"], publication.metadata.languages)
        self.assertEqual(
            datetime.datetime(2015, 9, 29, 0, 0, tzinfo=tzutc()),
            publication.metadata.published,
        )
        self.assertEqual(
            datetime.datetime(2015, 9, 29, 17, 0, 0, tzinfo=tzutc()),
            publication.metadata.modified,
        )

        self.assertIsInstance(publication.links, list)

        publication_acquisition_link = first_or_default(
            publication.links.get_by_rel(
                OPDS2LinkRelationsRegistry.BORROW.key))
        self.assertEqual(OPDS2LinkRelationsRegistry.BORROW.key,
                         publication_acquisition_link.rels[0])
        self.assertEqual(
            OPDS2MediaTypesRegistry.OPDS_PUBLICATION.key,
            publication_acquisition_link.type,
        )

        link_properties = publication_acquisition_link.properties
        self.assertIsInstance(link_properties, OPDS2LinkProperties)

        self.assertEqual(OPDS2AvailabilityType.AVAILABLE.value,
                         link_properties.availability.state)

        self.assertEqual(2, len(link_properties.indirect_acquisition))

        indirect_acquisition_object = link_properties.indirect_acquisition[0]
        self.assertEqual("application/vnd.adobe.adept+xml",
                         indirect_acquisition_object.type)
        self.assertEqual(1, len(indirect_acquisition_object.child))
        self.assertIsInstance(indirect_acquisition_object.child[0],
                              OPDS2AcquisitionObject)
        self.assertEqual("application/epub+zip",
                         indirect_acquisition_object.child[0].type)

        indirect_acquisition_object = link_properties.indirect_acquisition[1]
        self.assertEqual(
            "application/vnd.readium.lcp.license.v1.0+json",
            indirect_acquisition_object.type,
        )
        self.assertEqual(1, len(indirect_acquisition_object.child))
        self.assertIsInstance(indirect_acquisition_object.child[0],
                              OPDS2AcquisitionObject)
        self.assertEqual("application/epub+zip",
                         indirect_acquisition_object.child[0].type)

Exemplo n.º 14

0

Exibir arquivo

    def _extract_publication_metadata(self, feed, publication,
                                      data_source_name):
        """Extract a Metadata object from webpub-manifest-parser's publication.

        :param publication: Feed object
        :type publication: opds2_ast.OPDS2Feed

        :param publication: Publication object
        :type publication: opds2_ast.OPDS2Publication

        :param data_source_name: Data source's name
        :type data_source_name: str

        :return: Publication's metadata
        :rtype: Metadata
        """
        self._logger.debug(
            "Started extracting metadata from publication {0}".format(
                encode(publication)))

        title = publication.metadata.title

        if title == OPDSFeed.NO_TITLE:
            title = None

        subtitle = publication.metadata.subtitle

        languages = first_or_default(publication.metadata.languages)
        derived_medium = self._extract_medium_from_links(publication.links)
        medium = self._extract_medium(publication, derived_medium)

        publisher = first_or_default(publication.metadata.publishers)
        if publisher:
            publisher = publisher.name

        imprint = first_or_default(publication.metadata.imprints)
        if imprint:
            imprint = imprint.name

        published = publication.metadata.published
        subjects = self._extract_subjects(publication.metadata.subjects)
        contributors = (
            self._extract_contributors(publication.metadata.authors,
                                       Contributor.AUTHOR_ROLE) +
            self._extract_contributors(publication.metadata.translators,
                                       Contributor.TRANSLATOR_ROLE) +
            self._extract_contributors(publication.metadata.editors,
                                       Contributor.EDITOR_ROLE) +
            self._extract_contributors(publication.metadata.artists,
                                       Contributor.ARTIST_ROLE) +
            self._extract_contributors(publication.metadata.illustrators,
                                       Contributor.ILLUSTRATOR_ROLE) +
            self._extract_contributors(publication.metadata.letterers,
                                       Contributor.LETTERER_ROLE) +
            self._extract_contributors(publication.metadata.pencilers,
                                       Contributor.PENCILER_ROLE) +
            self._extract_contributors(publication.metadata.colorists,
                                       Contributor.COLORIST_ROLE) +
            self._extract_contributors(publication.metadata.inkers,
                                       Contributor.INKER_ROLE) +
            self._extract_contributors(publication.metadata.narrators,
                                       Contributor.NARRATOR_ROLE) +
            self._extract_contributors(publication.metadata.contributors,
                                       Contributor.CONTRIBUTOR_ROLE))

        feed_self_url = first_or_default(
            feed.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)).href
        links = self._extract_links(publication, feed_self_url)

        last_opds_update = publication.metadata.modified

        identifier = self._extract_identifier(publication)
        identifier_data = IdentifierData(type=identifier.type,
                                         identifier=identifier.identifier)

        # FIXME: There are no measurements in OPDS 2.0
        measurements = []

        # FIXME: There is no series information in OPDS 2.0
        series = None
        series_position = None

        # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one
        rights_uri = RightsStatus.rights_uri_from_string("")

        circulation_data = CirculationData(
            default_rights_uri=rights_uri,
            data_source=data_source_name,
            primary_identifier=identifier_data,
            links=links,
            licenses_owned=LicensePool.UNLIMITED_ACCESS,
            licenses_available=LicensePool.UNLIMITED_ACCESS,
            licenses_reserved=0,
            patrons_in_hold_queue=0,
            formats=[],
        )

        formats = self._find_formats_in_non_open_access_acquisition_links(
            publication.links, links, rights_uri, circulation_data)
        circulation_data.formats.extend(formats)

        metadata = Metadata(
            data_source=data_source_name,
            title=title,
            subtitle=subtitle,
            language=languages,
            medium=medium,
            publisher=publisher,
            published=published,
            imprint=imprint,
            primary_identifier=identifier_data,
            subjects=subjects,
            contributors=contributors,
            measurements=measurements,
            series=series,
            series_position=series_position,
            links=links,
            data_source_last_updated=last_opds_update,
            circulation=circulation_data,
        )

        self._logger.debug(
            "Finished extracting metadata from publication {0}: {1}".format(
                encode(publication), encode(metadata)))

        return metadata

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_parser.py Projeto: NYPL-Simplified/python-webpub-manifest-parser

    def test(self):
        # Arrange
        parser_factory = RWPMDocumentParserFactory()
        parser = parser_factory.create()
        input_file_path = os.path.join(
            os.path.dirname(__file__), "../../files/rwpm/spec_example.json"
        )

        # Act
        manifest = parser.parse_file(input_file_path)

        # Assert
        self.assertIsInstance(manifest.context, list)
        self.assertEqual(1, len(manifest.context))
        [context] = manifest.context
        self.assertEqual(context, "https://readium.org/webpub-manifest/context.jsonld")

        self.assertIsInstance(manifest.metadata, Metadata)
        self.assertEqual("http://schema.org/Book", manifest.metadata.type)
        self.assertEqual("Moby-Dick", manifest.metadata.title)
        self.assertEqual(
            [Contributor(name="Herman Melville", roles=[], links=LinkList())],
            manifest.metadata.authors,
        )
        self.assertEqual("urn:isbn:978031600000X", manifest.metadata.identifier)
        self.assertEqual(["en"], manifest.metadata.languages)
        self.assertEqual(
            datetime.datetime(2015, 9, 29, 17, 0, 0), manifest.metadata.modified
        )

        self.assertIsInstance(manifest.links, list)
        self.assertEqual(3, len(manifest.links))

        self_link = first_or_default(
            manifest.links.get_by_rel(RWPMLinkRelationsRegistry.SELF.key)
        )
        self.assertIsNotNone(self_link)
        self.assertIn(RWPMLinkRelationsRegistry.SELF.key, self_link.rels)
        self.assertEqual("https://example.com/manifest.json", self_link.href)
        self.assertEqual(RWPMMediaTypesRegistry.MANIFEST.key, self_link.type)

        alternate_link = first_or_default(
            manifest.links.get_by_rel(RWPMLinkRelationsRegistry.ALTERNATE.key)
        )
        self.assertIsNotNone(alternate_link)
        self.assertIn(RWPMLinkRelationsRegistry.ALTERNATE.key, alternate_link.rels)
        self.assertEqual("https://example.com/publication.epub", alternate_link.href)
        self.assertEqual(
            RWPMMediaTypesRegistry.EPUB_PUBLICATION_PACKAGE.key, alternate_link.type
        )

        search_link = first_or_default(
            manifest.links.get_by_rel(RWPMLinkRelationsRegistry.SEARCH.key)
        )
        self.assertIsNotNone(search_link)
        self.assertIn(RWPMLinkRelationsRegistry.SEARCH.key, search_link.rels)
        self.assertEqual("https://example.com/search{?query}", search_link.href)
        self.assertEqual(RWPMMediaTypesRegistry.HTML.key, search_link.type)

        self.assertIsInstance(manifest.reading_order, CompactCollection)
        self.assertIsInstance(manifest.reading_order.links, list)
        self.assertEqual(2, len(manifest.reading_order.links))

        reading_order_link = manifest.reading_order.links[0]
        self.assertEqual("https://example.com/c001.html", reading_order_link.href)
        self.assertEqual(RWPMMediaTypesRegistry.HTML.key, reading_order_link.type)
        self.assertEqual("Chapter 1", reading_order_link.title)

        reading_order_link = manifest.reading_order.links[1]
        self.assertEqual("https://example.com/c002.html", reading_order_link.href)
        self.assertEqual(RWPMMediaTypesRegistry.HTML.key, reading_order_link.type)
        self.assertEqual("Chapter 2", reading_order_link.title)

        resources_sub_collection = manifest.resources
        self.assertEqual(5, len(resources_sub_collection.links))
        self.assertEqual(
            [RWPMLinkRelationsRegistry.COVER.key],
            resources_sub_collection.links[0].rels,
        )
        self.assertEqual(
            "https://example.com/cover.jpg", resources_sub_collection.links[0].href
        )
        self.assertEqual(
            RWPMMediaTypesRegistry.JPEG.key, resources_sub_collection.links[0].type
        )
        self.assertEqual(600, resources_sub_collection.links[0].height)
        self.assertEqual(400, resources_sub_collection.links[0].width)

        self.assertEqual(
            "https://example.com/style.css", resources_sub_collection.links[1].href
        )
        self.assertEqual(
            RWPMMediaTypesRegistry.CSS.key, resources_sub_collection.links[1].type
        )

        self.assertEqual(
            "https://example.com/whale.jpg", resources_sub_collection.links[2].href
        )
        self.assertEqual(
            RWPMMediaTypesRegistry.JPEG.key, resources_sub_collection.links[2].type
        )

        self.assertEqual(
            "https://example.com/boat.svg", resources_sub_collection.links[3].href
        )
        self.assertEqual(
            RWPMMediaTypesRegistry.SVG_XML.key, resources_sub_collection.links[3].type
        )

        self.assertEqual(
            "https://example.com/notes.html", resources_sub_collection.links[4].href
        )
        self.assertEqual(
            RWPMMediaTypesRegistry.HTML.key, resources_sub_collection.links[4].type
        )

Exemplo n.º 16

0

Exibir arquivo

Arquivo: exporter.py Projeto: vbessonov/market-feed-exporter

    def export(
        self, feed_url: str, feed_login: str, feed_password: str, output_file: str
    ) -> None:

        self._logger.info(f"Started exporting {feed_url}")

        with open(output_file, "w") as output_file:
            output_file.write(
                "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(
                    "url",
                    "page",
                    "title",
                    "identifier",
                    "self_link",
                    "oa_acquisition_link",
                )
            )

            page = 1

            while True:
                try:
                    feed = self._parse_feed(feed_url, feed_login, feed_password)

                    for publication in feed.publications:
                        identifier = publication.metadata.identifier
                        title = publication.metadata.title.replace(
                            "&#39;", "'"
                        )
                        self_link = first_or_default(
                            publication.links.get_by_rel(
                                OPDS2LinkRelationsRegistry.SELF.key
                            )
                        )
                        self_link_href = self_link.href if self_link is not None else ""
                        oa_acquisition_link = first_or_default(
                            publication.links.get_by_rel(
                                OPDS2LinkRelationsRegistry.OPEN_ACCESS.key
                            )
                        )
                        oa_acquisition_link_href = (
                            oa_acquisition_link.href
                            if oa_acquisition_link is not None
                            else ""
                        )

                        output_file.write(
                            "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(
                                feed_url,
                                page,
                                title,
                                identifier,
                                self_link_href,
                                oa_acquisition_link_href,
                            )
                        )

                    next_link = first_or_default(feed.links.get_by_rel("next"))

                    if not next_link:
                        break

                    feed_url = next_link.href
                    page += 1
                except Exception:
                    self._logger.exception(
                        "An unexpected error occurred during parsing {0}".format(feed_url)
                    )

        output_file_path = os.path.join(os.getcwd(), output_file.name)

        self._logger.info(f"Finished exporting. The results have been saved to {output_file_path}")