def test_scraper_tif(filename, result_dict, evaluate_scraper):
    """
    Test scraper with tiff files.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected streams
    """
    correct = parse_results(filename, "image/tiff",
                            result_dict, True)
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(filename=correct.filename, mimetype="image/tiff")
    scraper.scrape_file()

    if correct.well_formed:
        for index, _ in enumerate(correct.streams):
            correct.streams[index]["version"] = UNAV
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part,
                                        scraper.errors())
        assert not scraper.streams
def test_scraper_gif(filename, result_dict, evaluate_scraper):
    """
    Test scraper with gif files.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected streams
    """
    correct = parse_results(filename, "image/gif", result_dict, True)
    # GIF is an index image
    if correct.well_formed:
        correct.streams[0]["samples_per_pixel"] = "1"
    for stream in correct.streams.values():
        stream["version"] = UNAV
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(filename=correct.filename, mimetype="image/gif")
    scraper.scrape_file()

    if correct.well_formed:
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part,
                                        scraper.errors())
        assert not scraper.streams
def test_scraper(filename, result_dict, header, extra_params,
                 evaluate_scraper):
    """
    Write test data and run csv scraping for the file.

    :filename: Test file name
    :result_dict: Result dict containing purpose of the test, parts of
                  expected stdout and stderr, and expected streams
    :header: CSV header line
    :extra_params: Extra parameters for the scraper (e.g. charset)
    """
    correct = parse_results(filename, "text/csv", result_dict, True)
    params = {
        "separator": correct.streams[0]["separator"],
        "delimiter": correct.streams[0]["delimiter"],
        "fields": header,
        "mimetype": MIMETYPE
    }
    params.update(extra_params)
    scraper = CsvScraper(filename=correct.filename,
                         mimetype=MIMETYPE,
                         params=params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
def test_scraper(filename, result_dict, params, evaluate_scraper):
    """
    Test scraper.

    :filename: Test file name
    :result_dict: Result dict containing test purpose, and parts of
                  expected results of stdout and stderr
    :params: schematron file as extra parameter
    """

    correct = parse_results(filename, "text/xml",
                            result_dict, True, params)
    scraper = SchematronScraper(filename=correct.filename,
                                mimetype="text/xml",
                                params=correct.params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)

    if "verbose" in correct.params and correct.params["verbose"]:
        assert not partial_message_included("have been suppressed",
                                            scraper.messages())
    elif scraper.messages():
        assert partial_message_included("have been suppressed",
                                        scraper.messages())
def test_encoding_check(filename, charset, is_wellformed, evaluate_scraper):
    """
    Test character encoding validation with brute force.

    :filename: Test file name
    :charset: Character encoding
    :is_wellformed: Expected result of well-formedness
    """
    params = {"charset": charset}
    correct = parse_results(filename, "text/plain", {}, True, params)
    scraper = TextEncodingScraper(filename=correct.filename,
                                  mimetype="text/plain", params=params)
    scraper.scrape_file()
    if not is_wellformed:
        correct.update_mimetype(UNAV)
        correct.update_version(UNAV)
        correct.streams[0]["stream_type"] = UNAV
    else:
        correct.update_mimetype("text/plain")
        correct.update_version(UNAP)
        correct.streams[0]["stream_type"] = "text"

    correct.well_formed = is_wellformed
    if correct.well_formed:
        correct.stdout_part = "encoding validated successfully"
        correct.stderr_part = ""
        evaluate_scraper(scraper, correct)
    else:
        assert partial_message_included("decoding error", scraper.errors())
        assert scraper.errors()
        assert not scraper.well_formed
Пример #6
0
def test_scraper(filename, result_dict, evaluate_scraper):
    """Test scraper."""
    correct = parse_results(filename, MIMETYPE, result_dict, True)
    scraper = DpxScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
Пример #7
0
def test_mixed_filetype(filename, result_dict, filetype, evaluate_scraper):
    """
    Test scraping files as wrong but supported file type.

    Some metadata models support many file types. For example, OfficeMagicMeta
    supports text, spreadsheet and presentation in both MS and open formats,
    among other file types. A side effect of this is that it is entirely
    possible to scrape e.g. an ods file as a doc (or xls) file by just forcing
    the file type the scraper uses, and this does not produce errors and the
    file is reported as well-formed.

    Currently this does not cause problems if the user is aware of this
    functionality, as no metadata scraping results are affected by it. This
    test can hopefully catch if problematic metadata functions are
    introduced in the future.
    """
    correct = parse_results(filename, filetype["correct_mimetype"],
                            result_dict, True)
    correct.update_mimetype(filetype["expected_mimetype"])
    correct.update_version(filetype["expected_version"])

    if filetype["given_mimetype"]:
        mimetype_guess = filetype["given_mimetype"]
    else:
        mimetype_guess = filetype["correct_mimetype"]

    params = {
        "mimetype": filetype["given_mimetype"],
        "version": filetype["given_version"],
        "mimetype_guess": mimetype_guess
    }
    scraper = MagicScraper(correct.filename, True, params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
Пример #8
0
def test_scraper_invalid(filename, result_dict, params, evaluate_scraper):
    """
    Test scraper with invalid files.

    :filename: Test file name
    :result_dict: Result dict containing test purpose, and parts of
                  expected results of stdout and stderr
    :params: Extra parameters for Scraper
    """
    correct = parse_results(filename, "text/xml", result_dict, True, params)
    scraper = XmllintScraper(filename=correct.filename,
                             mimetype="text/xml",
                             params=correct.params)
    scraper.scrape_file()
    if any(item in filename for item in
           ["empty", "no_closing_tag", "no_namespace_catalog", "diacritics"]):
        correct.well_formed = False
        correct.version = None
        correct.streams[0]["version"] = None

    if not correct.well_formed:
        assert not scraper.well_formed
        assert not scraper.streams
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
    else:
        evaluate_scraper(scraper, correct)
    assert not partial_message_included("<note>", scraper.messages())
Пример #9
0
def test_invalid_text(filename, mimetype):
    """Test TextFileMagic with invalid files."""
    result_dict = {
        "purpose": "Test invalid file.",
        "stdout_part": "",
        "stderr_part": "Unsupported MIME type"
    }
    correct = parse_results(filename, mimetype, result_dict, True)
    params = correct.params
    params["mimetype_guess"] = correct.mimetype
    scraper = MagicScraper(correct.filename, True, params)
    scraper.scrape_file()

    if "empty" in filename:
        correct.streams[0]["mimetype"] = "inode/x-empty"
        correct.mimetype = "inode/x-empty"
    else:
        correct.streams[0]["mimetype"] = "application/octet-stream"
        correct.mimetype = "application/octet-stream"

    correct.version = None
    correct.streams[0]["version"] = None
    correct.streams[0]["charset"] = None

    assert not scraper.well_formed
    assert not scraper.streams
    assert partial_message_included(correct.stdout_part, scraper.messages())
    assert partial_message_included(correct.stderr_part, scraper.errors())
Пример #10
0
def test_scraper_valid(filename, mimetype, evaluate_scraper):
    """Test scraper."""
    result_dict = {
        "purpose": "Test valid file.",
        "stdout_part": "successfully",
        "stderr_part": ""
    }
    correct = parse_results(filename, mimetype, result_dict, True)
    params = correct.params
    params["mimetype_guess"] = correct.mimetype
    scraper = MagicScraper(correct.filename, True, params)
    scraper.scrape_file()

    if correct.mimetype == "application/xhtml+xml":
        correct.streams[0]["stream_type"] = "text"
    if (OfficeFileMagicMeta.is_supported(correct.mimetype)
            or HtmlFileMagicMeta.is_supported(correct.mimetype)):
        correct.version = None
        correct.streams[0]["version"] = None
    if correct.mimetype in ["text/plain", "text/csv"]:
        correct.streams[0]["charset"] = "UTF-8"
        correct.streams[0]["version"] = "(:unap)"
    if filename == "valid__iso8859.txt":
        correct.streams[0]["charset"] = "ISO-8859-15"
    if mimetype == "text/html" or "vnd." in mimetype or "msword" in mimetype:
        correct.streams[0]["version"] = "(:unav)"
    if mimetype == "image/jp2":
        correct.streams[0]["version"] = ""

    evaluate_scraper(scraper, correct)
Пример #11
0
def test_scraper_pdf(filename, result_dict, evaluate_scraper):
    """
    Test Ghostscript scraper.

    :filename: Test filename. Character X is replaced with versions 1.7,
               A-1a, A-2b, and A-3b. All of these files must be found.
    :result_dict: Result dict containing the test purpose, and parts of
                  expected results of stdout and stderr
    """
    for ver in ["1.7", "A-1a", "A-2b", "A-3b"]:
        filename = filename.replace("X", ver)
        correct = parse_results(filename, "application/pdf", result_dict, True)
        scraper = GhostscriptScraper(filename=correct.filename,
                                     mimetype="application/pdf")
        scraper.scrape_file()

        # Ghostscript cannot handle version or MIME type
        correct.streams[0]["version"] = UNAV
        correct.streams[0]["mimetype"] = UNAV

        evaluate_scraper(scraper, correct, eval_output=False)

        if scraper.well_formed:
            assert not partial_message_included("Error", scraper.messages())
            assert not scraper.errors()
        else:
            assert partial_message_included(correct.stderr_part,
                                            scraper.errors())
            assert partial_message_included(correct.stdout_part,
                                            scraper.messages())
Пример #12
0
def test_scraper_gif(filename, result_dict, evaluate_scraper):
    """Test scraper with gif files."""
    correct = parse_results(filename, "image/gif",
                            result_dict, True)
    # GIF is an index image
    if correct.well_formed:
        correct.streams[0]["samples_per_pixel"] = "1"
    for stream in correct.streams.values():
        stream["version"] = "(:unav)"
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    if correct.well_formed:
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part, scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
Пример #13
0
def test_invalid_markdown_pdf_arc(filename, mimetype, class_,
                                  evaluate_scraper):
    """Test scrapers for invalid XML, XHTML, HTML, pdf and arc files."""
    result_dict = {
        'purpose': 'Test invalid file.',
        'stdout_part': 'successfully',
        'stderr_part': ''}
    correct = parse_results(filename, mimetype, result_dict, True)
    scraper = class_(correct.filename, correct.mimetype, True, correct.params)
    scraper.scrape_file()

    correct.well_formed = True

    if 'empty' in filename:
        correct.streams[0]['mimetype'] = 'inode/x-empty'

    if class_ == HtmlFileMagic:
        correct.version = None
        correct.streams[0]['version'] = None
    if class_ in [XhtmlFileMagic]:
        correct.streams[0]['stream_type'] = 'text'
    if class_ in [HtmlFileMagic, XmlFileMagic, XhtmlFileMagic]:
        correct.streams[0]['charset'] = 'UTF-8'

    evaluate_scraper(scraper, correct)
def test_existing_files(filename, mimetype, is_textfile, evaluate_scraper):
    """
    Test detecting whether file is a textfile.
    The scraper tool is not able to detect UTF-16 files without BOM or
    UTF-32 files.

    :filename: Test file name
    :mimetype: File MIME type
    :is_textfile: Expected result whether a file is a text file or not
    """
    correct = parse_results(filename, mimetype, {}, True)
    scraper = TextfileScraper(filename=correct.filename,
                              mimetype="text/plain")
    scraper.scrape_file()

    if is_textfile:
        correct.streams[0]["stream_type"] = "text"
        correct.update_mimetype("text/plain")
        correct.streams[0]["version"] = UNAP
    else:
        correct.streams[0]["stream_type"] = UNAV
        correct.update_mimetype(UNAV)
        correct.streams[0]["version"] = UNAV

    correct.well_formed = is_textfile
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
        evaluate_scraper(scraper, correct)
    else:
        assert partial_message_included(INVALID_MSG, scraper.errors())
        assert scraper.errors()
        assert not scraper.well_formed
Пример #15
0
def test_no_parameters(testpath, evaluate_scraper):
    """Test scraper without separate parameters."""
    with open(os.path.join(testpath, 'valid__.csv'), 'wb') as outfile:
        outfile.write(VALID_CSV)

    scraper = CsvScraper(outfile.name)
    scraper.scrape_file()

    correct = parse_results(
        'valid__.csv', MIMETYPE, {
            'purpose': 'Test valid file on default settings.',
            'stdout_part': 'successfully',
            'stderr_part': '',
            'streams': {
                0: {
                    'stream_type':
                    'text',
                    'index':
                    0,
                    'mimetype':
                    MIMETYPE,
                    'version':
                    '',
                    'delimiter':
                    ',',
                    'separator':
                    '\r\n',
                    'first_line':
                    ['1997', 'Ford', 'E350', 'ac, abs, moon', '3000.00']
                }
            }
        }, True)
    correct.streams[0]['version'] = "(:unap)"
    evaluate_scraper(scraper, correct)
Пример #16
0
def test_jpeg2000_inside_pdf(evaluate_scraper):
    """
    Test scraping a pdf file containing JPEG2000 image.

    Default Ghostscript installation on CentOS 7 does not support pdf files
    containing JPXDecode data. This test verifies that the installed version is
    recent enough.
    """
    filename = "valid_1.7_jpeg2000.pdf"
    mimetype = "application/pdf"
    result_dict = {
        "purpose": "Test pdf with JPEG2000 inside it.",
        "stdout_part": "Well-formed and valid",
        "stderr_part": ""
    }
    correct = parse_results(filename, mimetype, result_dict, True)

    scraper = GhostscriptScraper(correct.filename, True)
    scraper.scrape_file()

    # Ghostscript cannot handle version or MIME type
    correct.version = "(:unav)"
    correct.streams[0]["version"] = "(:unav)"
    correct.mimetype = "(:unav)"
    correct.streams[0]["mimetype"] = "(:unav)"

    evaluate_scraper(scraper, correct, eval_output=False)
def test_gzip_scraper(filename, result_dict, evaluate_scraper):
    """
    Test scraper for gzip files.

    :filename: Test file name
    :result_dict: Result dict containing test purpose, and parts of
                  expected results of stdout and stderr
    """
    mime = "application/warc"
    classname = "WarctoolsFullScraper"
    correct = parse_results(filename, mime, result_dict, True)
    scraper = GzipWarctoolsScraper(filename=correct.filename,
                                   mimetype="application/gzip")
    scraper.scrape_file()

    if not correct.well_formed and correct.streams[0]["version"] == UNAV:
        correct.update_mimetype("application/gzip")
        classname = "GzipWarctoolsScraper"

    if not correct.well_formed:
        assert not scraper.well_formed
        assert not scraper.streams
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
    else:
        evaluate_scraper(scraper, correct, exp_scraper_cls=classname)
Пример #18
0
def test_scraper(testpath, csv_text, result_dict, prefix, header,
                 evaluate_scraper, extra_params):
    """
    Write test data and run csv scraping for the file.

    NB: Forcing unsupported MIME type causes an error to be logged, resulting
        in the file being reported as not well-formed regardless of its
        contents.
    """

    with open(os.path.join(testpath, '%s.csv' % prefix), 'wb') as outfile:
        outfile.write(csv_text)

    mimetype = result_dict['streams'][0]['mimetype']
    version = result_dict['streams'][0]['version']

    words = outfile.name.rsplit('/', 1)
    correct = parse_results(words[1], '', result_dict, True, basepath=words[0])
    correct.update_mimetype(mimetype)
    correct.update_version(version)
    if mimetype != 'text/csv':
        correct.well_formed = False

    params = {
        'separator': correct.streams[0]['separator'],
        'delimiter': correct.streams[0]['delimiter'],
        'fields': header
    }
    params.update(extra_params)
    scraper = CsvScraper(correct.filename, True, params=params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
Пример #19
0
def test_mediainfo_scraper_wav(filename, result_dict, evaluate_scraper):
    """
    Test WAV scraping with Mediainfo.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected
                  streams
    """
    mimetype = "audio/x-wav"
    correct = parse_results(filename, mimetype, result_dict, False)
    if "2" in filename:
        correct.streams[0]["version"] = "2"
    else:
        correct.streams[0]["version"] = UNAP

    scraper = MediainfoScraper(filename=correct.filename, mimetype=mimetype)
    scraper.scrape_file()

    if "empty" in filename:
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
    else:
        evaluate_scraper(scraper, correct)
Пример #20
0
def test_mediainfo_scraper_mov(filename, result_dict, mimetype,
                               evaluate_scraper):
    """
    Test Quicktime and DV scraping with Mediainfo.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected
                  streams
    :mimetype: File MIME type
    """
    correct = parse_results(filename, mimetype, result_dict, False)
    scraper = MediainfoScraper(filename=correct.filename, mimetype=mimetype)
    scraper.scrape_file()

    if filename == "valid__h264_aac_no_ftyp_atom.mov":
        correct.streams[0]["codec_name"] = "QuickTime"
    if ".dv" in filename:
        correct.streams[0].pop("stream_type", None)

    if "empty" in filename:
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
    else:
        evaluate_scraper(scraper, correct)
Пример #21
0
def test_gzip_scraper(filename, result_dict, evaluate_scraper):
    """Test scraper."""
    if "warc" in filename:
        mime = "application/warc"
        classname = "WarcWarctoolsScraper"
    else:
        mime = "application/x-internet-archive"
        classname = "ArcWarctoolsScraper"
    correct = parse_results(filename, mime, result_dict, True)
    scraper = GzipWarctoolsScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    if correct.version == "" or correct.mimetype == \
            "application/x-internet-archive":
        correct.version = None
        correct.streams[0]["version"] = "(:unav)"
    if not correct.well_formed and correct.version is None:
        correct.mimetype = "application/gzip"
        correct.streams[0]["mimetype"] = "application/gzip"
        classname = "GzipWarctoolsScraper"

    if not correct.well_formed:
        assert not scraper.well_formed
        assert not scraper.streams
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
    else:
        evaluate_scraper(scraper, correct, exp_scraper_cls=classname)
Пример #22
0
def test_mediainfo_scraper_mkv(filename, result_dict, evaluate_scraper):
    """
    Test Matroska scraping with Mediainfo.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected
                  streams
    """
    mimetype = "video/x-matroska"
    correct = parse_results(filename, mimetype, result_dict, False)
    scraper = MediainfoScraper(filename=correct.filename, mimetype=mimetype)
    scraper.scrape_file()
    if "empty" in filename:
        correct.version = None
        correct.streams[0]["version"] = None
        correct.streams[0]["stream_type"] = None

    if "invalid" in filename:
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
    else:
        evaluate_scraper(scraper, correct)
Пример #23
0
def test_scraper_valid(filename, mimetype, charset, scraper_class,
                       evaluate_scraper):
    """Test scraper."""
    result_dict = {
        "purpose": "Test valid file.",
        "stdout_part": "successfully",
        "stderr_part": ""
    }
    correct = parse_results(filename, mimetype, result_dict, True,
                            {"charset": charset})

    office_unav_version_mimes = [
        "application/vnd.oasis.opendocument.text",
        "application/vnd.oasis.opendocument.spreadsheet",
        "application/vnd.oasis.opendocument.presentation",
        "application/vnd.oasis.opendocument.graphics",
        "application/vnd.oasis.opendocument.formula",
    ]

    scraper = scraper_class(filename=correct.filename,
                            mimetype=mimetype,
                            params={"charset": charset})
    scraper.scrape_file()
    if correct.streams[0]["mimetype"] == "application/xhtml+xml":
        correct.streams[0]["stream_type"] = "text"
    if ((correct.streams[0]["mimetype"] in office_unav_version_mimes)
            or HtmlFileMagicMeta.is_supported(correct.streams[0]["mimetype"])):
        correct.streams[0]["version"] = UNAV

    evaluate_scraper(scraper, correct)
def test_no_parameters(filename, evaluate_scraper):
    """
    Test scraper without separate parameters.

    :filename: Test file name
    """
    correct = parse_results(
        filename, MIMETYPE, {
            "purpose": "Test valid file on default settings.",
            "stdout_part": "successfully",
            "stderr_part": "",
            "streams": {
                0: {
                    "stream_type": "text",
                    "index": 0,
                    "mimetype": MIMETYPE,
                    "version": UNAP,
                    "delimiter": ",",
                    "separator": "\r\n",
                    "quotechar": "\"",
                    "first_line":
                    ["year", "brand", "model", "detail", "other"]
                }
            }
        }, True)
    scraper = CsvScraper(correct.filename, mimetype="text/csv")
    scraper.scrape_file()
    evaluate_scraper(scraper, correct)
Пример #25
0
def test_mediainfo_scraper_avi(filename, result_dict):
    """
    Test AVI scraping with Mediainfo.

    Both Mediainfo and FFMpeg cannot be used for metadata scraping, and FFMpeg
    meets our needs better with AVI, so MediainfoScraper should just return one
    stream full of unavs to be overwritten by results from FFMpeg.
    """
    mimetype = "video/avi"
    correct = parse_results(filename, mimetype, result_dict, True)

    scraper = MediainfoScraper(correct.filename,
                               True,
                               params={"mimetype_guess": mimetype})
    scraper.scrape_file()

    assert partial_message_included(correct.stdout_part, scraper.messages())
    assert partial_message_included(correct.stderr_part, scraper.errors())
    if "invalid" in filename:
        assert not scraper.streams
    else:
        assert len(scraper.streams) == 1
        for method in scraper.streams[0].iterate_metadata_methods():
            if method.__name__ == "index":
                assert method() == 0
            else:
                assert method() == "(:unav)"
Пример #26
0
def test_scraper_valid(filename, result_dict, params, evaluate_scraper):
    """Test scraper."""
    correct = parse_results(filename, 'text/xml', result_dict, True, params)
    scraper = Xmllint(correct.filename, correct.mimetype, True, correct.params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
    assert '<note>' not in scraper.messages()
Пример #27
0
def test_scraper_tiff(filename, result_dict, evaluate_scraper):
    """Test tiff scraping."""
    correct = parse_results(filename, "image/tiff", result_dict, True)
    scraper = JHoveTiffScraper(correct.filename, True, correct.params)
    scraper.scrape_file()
    correct.version = "6.0"
    correct.streams[0]["version"] = "6.0"

    evaluate_scraper(scraper, correct)
Пример #28
0
def test_scraper_jpeg(filename, result_dict, evaluate_scraper):
    """Test jpeg scraping."""
    correct = parse_results(filename, "image/jpeg", result_dict, True)
    scraper = JHoveJpegScraper(correct.filename, True, correct.params)
    scraper.scrape_file()
    correct.version = "(:unav)"
    correct.streams[0]["version"] = "(:unav)"

    evaluate_scraper(scraper, correct)
Пример #29
0
def test_scraper(filename, result_dict, evaluate_scraper):
    """Test scraper."""
    correct = parse_results(filename, MIMETYPE, result_dict, True)
    scraper = PsppScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    correct.streams[0]["mimetype"] = "(:unav)"

    evaluate_scraper(scraper, correct)
Пример #30
0
def test_scraper(filename, result_dict, evaluate_scraper):
    """Test scraper."""
    correct = parse_results(filename, MIMETYPE, result_dict, True)
    scraper = Vnu(correct.filename, correct.mimetype, True, correct.params)
    scraper.scrape_file()
    correct.version = '5.0'
    correct.streams[0]['version'] = '5.0'

    evaluate_scraper(scraper, correct)