def test_grade(file_path, expected_grade):
    """Test that scraper returns correct digital preservation grade."""
    scraper = Scraper(file_path)

    # File can not be graded before scraping
    assert scraper.grade() == "(:unav)"

    # After scraping the file should have expected grade
    scraper.scrape()
    assert scraper.grade() == expected_grade
def test_grading(fullname, mimetype, version):
    """Test grading for a valid test file.

    Test that file format is graded as recommended unless the file
    is explicitly listed as acceptable or unacceptable.
    """
    if fullname in UNAV_VERSION:
        pytest.skip(
            "File format version of file {} can not be defined.".format(
                fullname))

    charset = GIVEN_CHARSETS.get(fullname, None)
    scraper = Scraper(fullname,
                      mimetype=mimetype,
                      version=version,
                      charset=charset)
    scraper.scrape()

    if fullname in UNACCEPTABLE_FILES:
        expected_grade = UNACCEPTABLE
    elif fullname in BIT_LEVEL_FILES:
        expected_grade = BIT_LEVEL
    elif fullname in BIT_LEVEL_WITH_RECOMMENDED_FILES:
        expected_grade = BIT_LEVEL_WITH_RECOMMENDED
    elif fullname in ACCEPTABLE_FILES:
        expected_grade = ACCEPTABLE
    else:
        expected_grade = RECOMMENDED

    assert scraper.grade() == expected_grade
Exemplo n.º 3
0
def scrape_file(ctx, filename, check_wellformed, tool_info, mimetype, version):
    """
    Identify file type, collect metadata, and optionally check well-formedness.

    In addition to the given options, the user can provide any extra options
    that are passed onto the scraper. These options must be in the long form,
    e.g. "--charset=UTF-8" or "--charset UTF-8".
    \f

    :ctx: Context object
    :filename: Path to the file that should be scraped
    :check_wellformed: Flag whether the scraper checks wellformedness
    :tool_info: Flag whether the scraper includes messages from different 3rd
                party tools
    :mimetype: Specified mimetype for the scraped file
    :version: Specified version for the scraped file
    """
    scraper = Scraper(filename,
                      mimetype=mimetype,
                      version=version,
                      **_extra_options_to_dict(ctx.args))
    scraper.scrape(check_wellformed=check_wellformed)

    results = {
        "path": ensure_text(scraper.filename),
        "MIME type": ensure_text(scraper.mimetype),
        "version": ensure_text(scraper.version),
        "metadata": scraper.streams,
        "grade": scraper.grade()
    }
    if check_wellformed:
        results["well-formed"] = scraper.well_formed
    if tool_info:
        results["tool_info"] = scraper.info

    errors = {}

    for item in scraper.info.values():
        if "ScraperNotFound" in item["class"]:
            raise click.ClickException("Proper scraper was not found. The "
                                       "file was not analyzed.")

        if item["errors"]:
            errors[item["class"]] = item["errors"]

    if errors:
        results["errors"] = errors

    click.echo(json.dumps(results, indent=4))