Пример #1
0
def test_no_wellformed():
    """Test scraper without well-formed check."""
    scraper = SchematronScraper("tests/data/text_xml/valid_1.0_wellformed.xml",
                                False)
    scraper.scrape_file()
    assert partial_message_included("Skipping scraper", scraper.messages())
    assert scraper.well_formed is None
def test_xslt_filename():
    """Test that checksum for xslt filename is calculated properly."""
    # pylint: disable=protected-access
    scraper = SchematronScraper("filename", "text/xml")
    scraper._schematron_file = "tests/data/text_xml/supplementary/local.sch"
    assert "76ed62" in scraper._generate_xslt_filename()
    scraper._verbose = True
    assert "ddb11a" in scraper._generate_xslt_filename()
    scraper._extra_hash = "abc"
    assert "550d66" in scraper._generate_xslt_filename()
    scraper._verbose = False
    assert "791b2e" in scraper._generate_xslt_filename()
Пример #3
0
def test_forced_filetype(result_dict, filetype, evaluate_scraper):
    """
    Test using user-supplied MIME-types and versions.
    """
    filetype[six.text_type("correct_mimetype")] = "text/xml"
    correct = force_correct_filetype("valid_1.0_well_formed.xml", result_dict,
                                     filetype, ["(:unav)"])

    params = {
        "mimetype": filetype["given_mimetype"],
        "version": filetype["given_version"],
        "schematron": os.path.join(ROOTPATH, "tests/data/text_xml/local.sch")
    }
    scraper = SchematronScraper(correct.filename, True, params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
def test_scraper(filename, result_dict, params, evaluate_scraper):
    """
    Test scraper.

    :filename: Test file name
    :result_dict: Result dict containing test purpose, and parts of
                  expected results of stdout and stderr
    :params: schematron file as extra parameter
    """

    correct = parse_results(filename, "text/xml",
                            result_dict, True, params)
    scraper = SchematronScraper(filename=correct.filename,
                                mimetype="text/xml",
                                params=correct.params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)

    if "verbose" in correct.params and correct.params["verbose"]:
        assert not partial_message_included("have been suppressed",
                                            scraper.messages())
    elif scraper.messages():
        assert partial_message_included("have been suppressed",
                                        scraper.messages())
def test_parameters():
    """Test that parameters and default values work properly."""
    # pylint: disable=protected-access
    scraper = SchematronScraper("testsfile", "test/mimetype")
    assert scraper._schematron_file is None
    assert scraper._extra_hash is None
    assert not scraper._verbose
    assert scraper._cache

    scraper = SchematronScraper("testfile", "text/xml",
                                params={"schematron": "schfile",
                                        "extra_hash": "abc",
                                        "verbose": True,
                                        "cache": False})
    assert scraper._schematron_file == "schfile"
    assert scraper._extra_hash == "abc"
    assert scraper._verbose
    assert not scraper._cache
def test_filter_duplicate_elements():
    """Test duplicate element filtering."""
    # pylint: disable=protected-access
    schtest = \
        b"""<svrl:schematron-output
            xmlns:svrl="http://purl.oclc.org/dsdl/svrl">
               <svrl:active-pattern id="id"/>
               <svrl:active-pattern id="id"/>
               <svrl:fired-rule context="context"/>
               <svrl:fired-rule context="context"/>
               <svrl:failed-assert test="test">
                   <svrl:text>string</svrl:text>
               </svrl:failed-assert>
               <svrl:failed-assert test="test 2">
                   <svrl:text>string</svrl:text>
               </svrl:failed-assert>
               <svrl:fired-rule context="context"/>
               <svrl:active-pattern id="id"/>
           </svrl:schematron-output>"""
    scraper = SchematronScraper("filename", "text/xml")
    result = scraper._filter_duplicate_elements(schtest)
    assert result.count(b"<svrl:active-pattern") == 1
    assert result.count(b"<svrl:fired-rule") == 1
    assert result.count(b"<svrl:failed-assert") == 2
Пример #7
0
def test_scraper(filename, result_dict, params, evaluate_scraper):
    """Test scraper."""

    correct = parse_results(filename, "text/xml", result_dict, True, params)
    scraper = SchematronScraper(correct.filename, True, correct.params)
    scraper.scrape_file()
    correct.version = None
    correct.streams[0]["version"] = "(:unav)"
    correct.streams[0]["mimetype"] = "(:unav)"

    evaluate_scraper(scraper, correct)

    if "verbose" in correct.params and correct.params["verbose"]:
        assert not partial_message_included("have been suppressed",
                                            scraper.messages())
    elif scraper.messages():
        assert partial_message_included("have been suppressed",
                                        scraper.messages())
def test_is_supported():
    """Test is_supported method."""
    mime = "text/xml"
    ver = "1.0"
    assert SchematronScraper.is_supported(mime, ver, True,
                                          {"schematron": None})
    assert not SchematronScraper.is_supported(mime, ver, True)
    assert SchematronScraper.is_supported(mime, None, True,
                                          {"schematron": None})
    assert not SchematronScraper.is_supported(mime, ver, False,
                                              {"schematron": None})
    assert SchematronScraper.is_supported(mime, "foo", True,
                                          {"schematron": None})
    assert not SchematronScraper.is_supported("foo", ver, True,
                                              {"schematron": None})
Пример #9
0
def main(arguments=None):
    """Main loop"""
    usage = "usage: %prog [options] xml-file-path"

    parser = optparse.OptionParser(usage=usage)

    parser.add_option("-s", "--schemapath", dest="schemapath",
                      help="Path to schematron schemas",
                      metavar="PATH")

    (options, args) = parser.parse_args(arguments)

    if len(args) != 1:
        parser.error("Must give a path to an XML file as argument")

    if options.schemapath is None:
        parser.error("The -s switch is required")

    filename = args[0]

    if os.path.isdir(filename):
        filename = os.path.join(filename, 'mets.xml')

    scraper = SchematronScraper(
        filename, mimetype="text/xml",
        params={"schematron": options.schemapath})
    scraper.scrape_file()

    message_string = ensure_text(concat(scraper.messages()).strip())
    error_string = ensure_text(concat(scraper.errors()).strip())
    if message_string:
        print(message_string)
    if error_string:
        print(error_string, file=sys.stderr)

    if error_string or not scraper.well_formed:
        return 117
    return 0