Exemplo n.º 1
0
def test_stdout_logging_and_csv_module_fail(capsys):
    with UI(None, logging.DEBUG, stdout=True) as ui:
        data = 'tests/fixtures/unparsable.csv'
        exc = str("""[ERROR] The csv module failed to detect the CSV """ +
                  """dialect. Try giving hints with the --delimiter """ +
                  """argument, E.g  --delimiter=','""")
        msg = ('{}\nIf you need assistance please send the output of this '
               'script to [email protected].').format(exc)
        with mock.patch('datarobot_batch_scoring.utils.os._exit') as m_exit:
            with pytest.raises(csv.Error):
                investigate_encoding_and_dialect(data, None, ui)
            m_exit.assert_called_with(1)
        out, err = capsys.readouterr()
        assert msg in out.strip('\n')
Exemplo n.º 2
0
def test_investigate_encoding_and_dialect():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/windows_encoded.csv'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        dialect = csv.get_dialect('dataset_dialect')
        assert encoding == 'iso-8859-2'
        assert dialect.lineterminator == '\r\n'
        assert dialect.quotechar == '"'
        assert dialect.delimiter == ','
Exemplo n.º 3
0
def test_investigate_encoding_and_dialect_submit_encoding():

    with UI(None, logging.DEBUG, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.utils.chardet.detect') as cd:
            data = 'tests/fixtures/windows_encoded.csv'
            encoding = investigate_encoding_and_dialect(data, None, ui,
                                                        fast=False,
                                                        encoding='iso-8859-2',
                                                        skip_dialect=False)
        assert encoding == 'iso-8859-2'
        assert not cd.called
Exemplo n.º 4
0
def test_investigate_encoding_and_dialect_substitute_delimiter():

    with UI(None, logging.DEBUG, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.utils.csv.Sniffer') as sn:
            data = 'tests/fixtures/windows_encoded.csv'
            encoding = investigate_encoding_and_dialect(data, '|', ui,
                                                        fast=False,
                                                        encoding='utf-8',
                                                        skip_dialect=True)
        assert encoding == 'utf-8'  # Intentionally wrong
        assert not sn.called
        dialect = csv.get_dialect('dataset_dialect')
        assert dialect.delimiter == '|'
Exemplo n.º 5
0
def test_investigate_encoding_and_dialect_skip_dialect():

    with UI(None, logging.DEBUG, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.utils.csv.Sniffer') as sn:
            data = 'tests/fixtures/windows_encoded.csv'
            encoding = investigate_encoding_and_dialect(data, None, ui,
                                                        fast=False,
                                                        encoding='',
                                                        skip_dialect=True)
        assert encoding == 'iso-8859-2'
        assert not sn.called
        dialect = csv.get_dialect('dataset_dialect')
        assert dialect.delimiter == ','
Exemplo n.º 6
0
def test_auto_small_dataset():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/regression_jp.csv.gz'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        assert auto_sampler(data, encoding, ui) == 500
Exemplo n.º 7
0
def test_auto_sample():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/criteo_top30_1m.csv.gz'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        assert auto_sampler(data, encoding, ui) == 14980
        ui.close()