def test_empty_file():
    """
    Test empty file, and that sniffer is not used if delimiter and
    separator are given.

    We first test with empty file that sniffer raises exception if the
    parameters are not given. Secondly, sniffer is skipped when parameters
    are given, but the then scraper raises exception elsewhere.
    """
    scraper = CsvScraper("tests/data/text_csv/invalid__empty.csv",
                         mimetype=MIMETYPE)
    scraper.scrape_file()
    assert partial_message_included("Could not determine delimiter",
                                    scraper.errors())
    assert not scraper.well_formed

    scraper = CsvScraper("tests/data/text_csv/invalid__empty.csv",
                         mimetype=MIMETYPE,
                         params={
                             "delimiter": ";",
                             "separator": "CRLF"
                         })
    scraper.scrape_file()
    assert partial_message_included("Error reading file as CSV",
                                    scraper.errors())
    assert not scraper.well_formed
def test_no_parameters(filename, evaluate_scraper):
    """
    Test scraper without separate parameters.

    :filename: Test file name
    """
    correct = parse_results(
        filename, MIMETYPE, {
            "purpose": "Test valid file on default settings.",
            "stdout_part": "successfully",
            "stderr_part": "",
            "streams": {
                0: {
                    "stream_type": "text",
                    "index": 0,
                    "mimetype": MIMETYPE,
                    "version": UNAP,
                    "delimiter": ",",
                    "separator": "\r\n",
                    "quotechar": "\"",
                    "first_line":
                    ["year", "brand", "model", "detail", "other"]
                }
            }
        }, True)
    scraper = CsvScraper(correct.filename, mimetype="text/csv")
    scraper.scrape_file()
    evaluate_scraper(scraper, correct)
def test_scraper(filename, result_dict, header, extra_params,
                 evaluate_scraper):
    """
    Write test data and run csv scraping for the file.

    :filename: Test file name
    :result_dict: Result dict containing purpose of the test, parts of
                  expected stdout and stderr, and expected streams
    :header: CSV header line
    :extra_params: Extra parameters for the scraper (e.g. charset)
    """
    correct = parse_results(filename, "text/csv", result_dict, True)
    params = {
        "separator": correct.streams[0]["separator"],
        "delimiter": correct.streams[0]["delimiter"],
        "fields": header,
        "mimetype": MIMETYPE
    }
    params.update(extra_params)
    scraper = CsvScraper(filename=correct.filename,
                         mimetype=MIMETYPE,
                         params=params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
def test_is_supported():
    """Test is_supported method."""
    mime = MIMETYPE
    ver = ""
    assert CsvScraper.is_supported(mime, ver, True)
    assert CsvScraper.is_supported(mime, None, True)
    assert CsvScraper.is_supported(mime, ver, False)
    assert CsvScraper.is_supported(mime, "foo", True)
    assert not CsvScraper.is_supported("foo", ver, True)
def test_nonexistent_file():
    """
    Test that CsvScraper logs an error when file is not found.
    """
    scraper = CsvScraper(filename="nonexistent/file.csv", mimetype="text/csv")
    scraper.scrape_file()
    assert partial_message_included("Error when reading the file: ",
                                    scraper.errors())
    assert not scraper.well_formed
def test_pdf_as_csv():
    """Test CSV scraper with PDF files."""
    scraper = CsvScraper(filename=PDF_PATH, mimetype="text/csv")
    scraper.scrape_file()

    assert not scraper.well_formed, scraper.messages() + scraper.errors()
    assert not partial_message_included("successfully", scraper.messages())
    assert scraper.errors()
def test_first_line_charset(filename, charset):
    """
    Test that CSV handles the first line encoding correctly.

    :filename: Test file name
    :charset: Character encoding
    """
    params = {
        "delimiter": ",",
        "separator": "CR+LF",
        "mimetype": "text/csv",
        "charset": charset
    }

    scraper = CsvScraper(filename, mimetype="text/csv", params=params)
    scraper.scrape_file()
    assert scraper.well_formed
    assert scraper.streams[0].first_line() == \
        ["year", "bränd", "mödel", "detail", "other"]
def test_large_field(filename, result_dict, header, extra_params, size,
                     evaluate_scraper, testpath):
    """
    Test that large field sizes are properly handled.
    Large test files are created on the fly so as not to take up space.

    :filename: Test file name
    :result_dict: Result dict containing purpose of the test, parts of
                  expected stdout and stderr, and expected streams
    :header: CSV header line
    :extra_params: Extra parameters for the scraper (e.g. charset)
    :size: Amount of bytes in the large field
    """
    tempdatapath = os.path.join(testpath, "text_csv")
    os.makedirs(tempdatapath)
    tempfilepath = os.path.join(tempdatapath, filename)
    with io.open(tempfilepath, 'w', encoding='utf8') as tempfile:
        tempfile.write("test1,test2\ntest3,")
        tempfile.write(size * "a")

    correct = parse_results(filename,
                            "text/csv",
                            result_dict,
                            True,
                            basepath=testpath)
    params = {
        "separator": correct.streams[0]["separator"],
        "delimiter": correct.streams[0]["delimiter"],
        "fields": header,
        "mimetype": "text/csv"
    }
    params.update(extra_params)
    scraper = CsvScraper(filename=correct.filename,
                         mimetype=MIMETYPE,
                         params=params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)