Python CUAHSIHarvester.retrieve_record示例

编程语言: Python

命名空间/包名称: schema_org.cuahsi

类/类型: CUAHSIHarvester

方法/功能: retrieve_record

hotexamples.com的示例: 3

Python CUAHSIHarvester.retrieve_record - 已找到3个示例。这些是从开源项目中提取的最受好评的schema_org.cuahsi.CUAHSIHarvester.retrieve_record现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

CUAHSIHarvester(12)

run(4)

retrieve_record(3)

get_jsonld(2)

preprocess_landing_page(2)

retrieve_landing_page_content(1)

示例#1

显示文件

文件： test_cuahsi.py 项目： jevans97utk/SlenderNodes

    def test_retrieve_record__no_url_for_zip_archive(self):
        """
        SCENARIO:  We have a URL for a landing page for a PUBLISHED document,
        but the landing page does not have a proper URL for the bagit zip
        archive.  Yeah, this happens.

        EXPECTED RESULT:  A SkipError is issued.
        """
        url = ('https://www.hydroshare.org'
               '/resource/81e947faccf04de59392dddaac77bc75/')

        # External I/O
        #
        # 1st:  landing page
        package = 'tests.data.cuahsi.81e947faccf04de59392dddaac77bc75'
        contents1 = ir.read_text(package, 'landing_page.no_zip_url.html')

        harvester = CUAHSIHarvester()

        with self.assertLogs(logger=harvester.logger, level='INFO'):
            with aioresponses() as m:
                m.get(self.regex, body=contents1)

                with self.assertRaises(SkipError):
                    asyncio.run(harvester.retrieve_record(url))

示例#2

显示文件

文件： test_cuahsi.py 项目： jevans97utk/SlenderNodes

    def test_retrieve_record__bad_metadata_document(self):
        """
        SCENARIO:  We have a URL for a landing page for a PUBLISHED document.
        The metadata document, however, is invalid.

        EXPECTED RESULT:  An XMLMetadataParsingError is issued.
        """
        url = ('https://www.hydroshare.org'
               '/resource/81e947faccf04de59392dddaac77bc75/')

        # External I/O
        #
        # 1st:  landing page
        # 2nd:  zip archive containing data and metadata
        package = 'tests.data.cuahsi.81e947faccf04de59392dddaac77bc75'
        contents1 = ir.read_text(package, 'landing_page.html')

        # Switch out the metadata document for something that is NOT xml.
        b = io.BytesIO()
        zf = zipfile.ZipFile(b, mode='w')
        zf.writestr('81e947faccf04de59392dddaac77bc75/data/resourcemetadata',
                    b'not xml')
        zf.close()
        b.seek(0)
        contents2 = b.read()

        harvester = CUAHSIHarvester()

        with self.assertLogs(logger=harvester.logger, level='INFO'):
            with aioresponses() as m:
                m.get(self.regex, body=contents1)
                m.get(self.regex, body=contents2)

                with self.assertRaises(XMLMetadataParsingError):
                    asyncio.run(harvester.retrieve_record(url))

示例#3

显示文件

文件： test_cuahsi.py 项目： jevans97utk/SlenderNodes

    def test_retrieve_record(self):
        """
        SCENARIO:  We have a URL for a landing page for a PUBLISHED document.

        EXPECTED RESULT:  The series identifier is retrieved.  The lastMod
        time is None because this is only retrieved in schema.org.
        """
        url = ('https://www.hydroshare.org'
               '/resource/81e947faccf04de59392dddaac77bc75/')

        # External I/O
        #
        # 1st:  landing page
        # 2nd:  zip archive containing data and metadata
        package = 'tests.data.cuahsi.81e947faccf04de59392dddaac77bc75'
        contents1 = ir.read_text(package, 'landing_page.html')

        b = io.BytesIO()
        zf = zipfile.ZipFile(b, mode='w')
        package = 'tests.data.cuahsi.81e947faccf04de59392dddaac77bc75.data'
        content = ir.read_binary(package, 'resourcemetadata.xml')
        zf.writestr('81e947faccf04de59392dddaac77bc75/data/resourcemetadata',
                    content)
        zf.close()
        b.seek(0)
        contents2 = b.read()

        harvester = CUAHSIHarvester()

        with self.assertLogs(logger=harvester.logger, level='INFO'):
            with aioresponses() as m:
                m.get(self.regex, body=contents1)
                m.get(self.regex, body=contents2)

                awaitable = harvester.retrieve_record(url)
                sid, pid, lastmod, doc = asyncio.run(awaitable)

        self.assertEqual(sid, '10.4211/hs.81e947faccf04de59392dddaac77bc75')
        self.assertIsNone(lastmod)