示例#1
0
def test_source_csv_dialect(tmpdir, table):
    comma_file = str(tmpdir.join('comma.csv'))
    tab_file = str(tmpdir.join('tab.csv'))
    passwd_file = str(tmpdir.join('weird.csv'))
    json_file = str(tmpdir.join('data.json'))

    with open(comma_file, 'w', encoding='utf-8') as f:
        writer = csv.writer(f)
        for row in table:
            writer.writerow(row)
    with open(tab_file, 'w', encoding='utf-8') as f:
        writer = csv.writer(f, delimiter='\t')
        for row in table:
            writer.writerow(row)
    with open(passwd_file, 'w', encoding='utf-8') as f:
        writer = csv.writer(f, delimiter=':')
        for row in table:
            writer.writerow(row)
    with open(json_file, 'w', encoding='utf-8') as f:
        json.dump(table, f)

    with open(comma_file, 'rb') as f:
        s = Source(f)
        assert s.csv_dialect.delimiter == ','
        assert s.csv_dialect.quotechar == '"'
    with open(tab_file, 'rb') as f:
        s = Source(f)
        assert s.csv_dialect.delimiter == '\t'
        assert s.csv_dialect.quotechar == '"'
    with open(passwd_file, 'rb') as f:
        s = Source(f, format='csv', csv_delimiter=':', csv_quotechar="'")
        assert s.csv_dialect.delimiter == ':'
        assert s.csv_dialect.quotechar == "'"
    with open(json_file, 'rb') as f:
        assert Source(f).csv_dialect is None
示例#2
0
def test_source_sample_limit(tmpdir):
    filename = str(tmpdir.join('data.file'))
    with open(filename, 'wb') as f:
        f.write(b'\xff' * 2000)
    with open(filename, 'rb') as f:
        s = Source(f, sample_limit=1000)
        assert s._sample_bytes() == b'\xff' * 1000
        assert f.tell() == 1000
        # Check query idempotency
        assert s._sample_bytes() == b'\xff' * 1000
        assert f.tell() == 1000
示例#3
0
def test_source_format(tmpdir, table):
    filename = str(tmpdir.join('data.csv'))
    with open(filename, 'w', encoding='utf-8') as f:
        writer = csv.writer(f)
        for row in table:
            writer.writerow(row)
    with open(filename, 'rb') as f:
        assert Source(f).format == 'csv'
        f.seek(0)
        with pytest.raises(ValueError):
            Source(f, format='json').data
示例#4
0
def test_source_bad_data(tmpdir):
    filename = str(tmpdir.join('data.yaml'))
    with open(filename, 'w') as f:
        f.write('\n' * 100)
    with open(filename, 'rb') as f:
        with pytest.raises(ValueError):
            Source(f).data
示例#5
0
def test_source_detect_xml(tmpdir):
    filename = str(tmpdir.join('data.xml'))
    with open(filename, 'w') as f:
        f.write('<?xml version="1.0" encoding="utf-8" ?><doc></doc>')
    with open(filename, 'rb') as f:
        assert Source(f).format == 'xml'
    with open(filename, 'w') as f:
        f.write('\n<?xml version="1.0" encoding="utf-8" ?><doc></doc>')
    with open(filename, 'rb') as f:
        with pytest.warns(ValidationWarning):
            assert Source(f).format == 'xml'
    with open(filename, 'w') as f:
        f.write('<doc><header></header><footer></footer></doc>')
    with open(filename, 'rb') as f:
        with pytest.warns(ValidationWarning):
            assert Source(f).format == 'xml'
示例#6
0
def test_source_yaml_data(tmpdir, table):
    data_file = str(tmpdir.join('data.yaml'))
    with open(data_file, 'w', encoding='utf-8') as f:
        yaml.dump(table, f)
    with open(data_file, 'rb') as f:
        s = Source(f)
        assert s.data == table
        # Check repeat-query idempotency
        assert s.data == table
示例#7
0
def test_source_encoding(tmpdir, table):
    filename = str(tmpdir.join('latin-1.csv'))
    with open(filename, 'w', encoding='latin-1') as f:
        writer = csv.writer(f)
        for row in table[:-3]:
            writer.writerow(row)
    with open(filename, 'rb') as f:
        with pytest.warns(ValidationWarning):
            assert Source(f).encoding.lower() == 'iso-8859-1'
        f.seek(0)
        with pytest.raises(UnicodeError):
            Source(f, encoding='utf-8').data

    filename = str(tmpdir.join('utf-8.csv'))
    with open(filename, 'w', encoding='utf-8') as f:
        writer = csv.writer(f)
        for row in table:
            writer.writerow(row)
    with open(filename, 'rb') as f:
        assert Source(f).encoding.lower() == 'utf-8'
示例#8
0
def test_source_detect_yaml(tmpdir):
    filename = str(tmpdir.join('data.yaml'))
    with open(filename, 'w') as f:
        f.write("""\
structa:
  language: Python
  versions: 3.5, 3.6, 3.7, 3.8
  os: all
""")
    with open(filename, 'rb') as f:
        assert Source(f).format == 'yaml'
示例#9
0
def test_source_csv_data(tmpdir, table):
    data_file = str(tmpdir.join('data.csv'))
    with open(data_file, 'w', encoding='utf-8') as f:
        writer = csv.writer(f)
        for row in table:
            writer.writerow(row)
    with open(data_file, 'rb') as f:
        s = Source(f)
        assert s.data == table[1:]
        # Check repeat-query idempotency
        assert s.data == table[1:]
示例#10
0
def test_source_detect_csv(tmpdir, table):
    filename = str(tmpdir.join('data.csv'))
    with open(filename, 'w', encoding='utf-8') as f:
        f.write('\r\n'.join(
            ','.join(
                '"{value}"'.format(value=value.replace('"', '""'))
                for value in row
            )
            for row in table
        ))
    with open(filename, 'rb') as f:
        assert Source(f).format == 'csv'
示例#11
0
def test_source_detect_yaml_missing(tmpdir):
    with mock.patch('structa.source.yaml', None):
        filename = str(tmpdir.join('data.yaml'))
        with open(filename, 'w') as f:
            f.write("""\
structa:
  language: Python
  versions: 3.5, 3.6, 3.7, 3.8
  os: all
""")
        with open(filename, 'rb') as f:
            with pytest.raises(ImportError):
                Source(f).data
示例#12
0
def test_source_unknown(tmpdir):
    filename = str(tmpdir.join('data.yaml'))
    with open(filename, 'w') as f:
        f.write('\n' * 100)
    with open(filename, 'rb') as f:
        assert Source(f).format == 'unknown'