def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = SchematronScraper("tests/data/text_xml/valid_1.0_wellformed.xml", False) scraper.scrape_file() assert partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None
def test_xslt_filename(): """Test that checksum for xslt filename is calculated properly.""" # pylint: disable=protected-access scraper = SchematronScraper("filename", "text/xml") scraper._schematron_file = "tests/data/text_xml/supplementary/local.sch" assert "76ed62" in scraper._generate_xslt_filename() scraper._verbose = True assert "ddb11a" in scraper._generate_xslt_filename() scraper._extra_hash = "abc" assert "550d66" in scraper._generate_xslt_filename() scraper._verbose = False assert "791b2e" in scraper._generate_xslt_filename()
def test_forced_filetype(result_dict, filetype, evaluate_scraper): """ Test using user-supplied MIME-types and versions. """ filetype[six.text_type("correct_mimetype")] = "text/xml" correct = force_correct_filetype("valid_1.0_well_formed.xml", result_dict, filetype, ["(:unav)"]) params = { "mimetype": filetype["given_mimetype"], "version": filetype["given_version"], "schematron": os.path.join(ROOTPATH, "tests/data/text_xml/local.sch") } scraper = SchematronScraper(correct.filename, True, params) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper(filename, result_dict, params, evaluate_scraper): """ Test scraper. :filename: Test file name :result_dict: Result dict containing test purpose, and parts of expected results of stdout and stderr :params: schematron file as extra parameter """ correct = parse_results(filename, "text/xml", result_dict, True, params) scraper = SchematronScraper(filename=correct.filename, mimetype="text/xml", params=correct.params) scraper.scrape_file() evaluate_scraper(scraper, correct) if "verbose" in correct.params and correct.params["verbose"]: assert not partial_message_included("have been suppressed", scraper.messages()) elif scraper.messages(): assert partial_message_included("have been suppressed", scraper.messages())
def test_parameters(): """Test that parameters and default values work properly.""" # pylint: disable=protected-access scraper = SchematronScraper("testsfile", "test/mimetype") assert scraper._schematron_file is None assert scraper._extra_hash is None assert not scraper._verbose assert scraper._cache scraper = SchematronScraper("testfile", "text/xml", params={"schematron": "schfile", "extra_hash": "abc", "verbose": True, "cache": False}) assert scraper._schematron_file == "schfile" assert scraper._extra_hash == "abc" assert scraper._verbose assert not scraper._cache
def test_filter_duplicate_elements(): """Test duplicate element filtering.""" # pylint: disable=protected-access schtest = \ b"""<svrl:schematron-output xmlns:svrl="http://purl.oclc.org/dsdl/svrl"> <svrl:active-pattern id="id"/> <svrl:active-pattern id="id"/> <svrl:fired-rule context="context"/> <svrl:fired-rule context="context"/> <svrl:failed-assert test="test"> <svrl:text>string</svrl:text> </svrl:failed-assert> <svrl:failed-assert test="test 2"> <svrl:text>string</svrl:text> </svrl:failed-assert> <svrl:fired-rule context="context"/> <svrl:active-pattern id="id"/> </svrl:schematron-output>""" scraper = SchematronScraper("filename", "text/xml") result = scraper._filter_duplicate_elements(schtest) assert result.count(b"<svrl:active-pattern") == 1 assert result.count(b"<svrl:fired-rule") == 1 assert result.count(b"<svrl:failed-assert") == 2
def test_scraper(filename, result_dict, params, evaluate_scraper): """Test scraper.""" correct = parse_results(filename, "text/xml", result_dict, True, params) scraper = SchematronScraper(correct.filename, True, correct.params) scraper.scrape_file() correct.version = None correct.streams[0]["version"] = "(:unav)" correct.streams[0]["mimetype"] = "(:unav)" evaluate_scraper(scraper, correct) if "verbose" in correct.params and correct.params["verbose"]: assert not partial_message_included("have been suppressed", scraper.messages()) elif scraper.messages(): assert partial_message_included("have been suppressed", scraper.messages())
def test_is_supported(): """Test is_supported method.""" mime = "text/xml" ver = "1.0" assert SchematronScraper.is_supported(mime, ver, True, {"schematron": None}) assert not SchematronScraper.is_supported(mime, ver, True) assert SchematronScraper.is_supported(mime, None, True, {"schematron": None}) assert not SchematronScraper.is_supported(mime, ver, False, {"schematron": None}) assert SchematronScraper.is_supported(mime, "foo", True, {"schematron": None}) assert not SchematronScraper.is_supported("foo", ver, True, {"schematron": None})
def main(arguments=None): """Main loop""" usage = "usage: %prog [options] xml-file-path" parser = optparse.OptionParser(usage=usage) parser.add_option("-s", "--schemapath", dest="schemapath", help="Path to schematron schemas", metavar="PATH") (options, args) = parser.parse_args(arguments) if len(args) != 1: parser.error("Must give a path to an XML file as argument") if options.schemapath is None: parser.error("The -s switch is required") filename = args[0] if os.path.isdir(filename): filename = os.path.join(filename, 'mets.xml') scraper = SchematronScraper( filename, mimetype="text/xml", params={"schematron": options.schemapath}) scraper.scrape_file() message_string = ensure_text(concat(scraper.messages()).strip()) error_string = ensure_text(concat(scraper.errors()).strip()) if message_string: print(message_string) if error_string: print(error_string, file=sys.stderr) if error_string or not scraper.well_formed: return 117 return 0