Пример #1
0
def test_read_bom(num, prefix=""):
	with gzutil.GzBytesLines(TMP_FN) as fh:
		data = list(fh)
		assert data == [prefix.encode("utf-8") + b"\xef\xbb\xbfa", b"\xef\xbb\xbfb"], (num, data)
	with gzutil.GzBytesLines(TMP_FN, strip_bom=True) as fh:
		data = list(fh)
		assert data == [prefix.encode("utf-8") + b"a", b"\xef\xbb\xbfb"], (num, data)
	with gzutil.GzUnicodeLines(TMP_FN) as fh:
		data = list(fh)
		assert data == [prefix + "\ufeffa", "\ufeffb"], (num, data)
	with gzutil.GzUnicodeLines(TMP_FN, strip_bom=True) as fh:
		data = list(fh)
		assert data == [prefix + "a", "\ufeffb"], (num, data)
	with gzutil.GzUnicodeLines(TMP_FN, "latin-1") as fh:
		data = list(fh)
		assert data == [prefix.encode("utf-8").decode("latin-1") + u"\xef\xbb\xbfa", u"\xef\xbb\xbfb"], (num, data)
	with gzutil.GzUnicodeLines(TMP_FN, "latin-1", strip_bom=True) as fh:
		data = list(fh)
		assert data == [prefix.encode("utf-8").decode("latin-1") + u"a", u"\xef\xbb\xbfb"], (num, data)
	with gzutil.GzUnicodeLines(TMP_FN, "ascii", "ignore") as fh:
		data = list(fh)
		assert data == ["a", "b"], (num, data)
	if version_info[0] > 2:
		with gzutil.GzAsciiLines(TMP_FN) as fh:
			try:
				next(fh)
				raise Exception("GzAsciiLines allowed non-ascii in python3")
			except ValueError:
				pass
Пример #2
0
	def __init__(self, *a, **kw):
		if 'max_count' in kw:
			kw['max_count'] += 1
		if PY3:
			self.fh = gzutil.GzUnicodeLines(*a, **kw)
		else:
			self.fh = gzutil.GzBytesLines(*a, **kw)
Пример #3
0
def prepare(SOURCE_DIRECTORY):
    separator = options.separator
    assert len(separator) == 1
    filename = os.path.join(SOURCE_DIRECTORY, options.filename)
    orig_filename = filename

    if filename.lower().endswith('.zip'):
        from zipfile import ZipFile
        filename = 'extracted'
        with ZipFile(orig_filename, 'r') as z:
            infos = z.infolist()
            assert len(
                infos
            ) == 1, 'There is only support for ZIP files with exactly one member.'
            # Wouldn't it be nice if ZipFile.extract let me choose the filename?
            with open(filename, 'wb') as ofh:
                zfh = z.open(infos[0])
                while True:
                    data = zfh.read(1024 * 1024)
                    if not data:
                        break
                    ofh.write(data)

    if options.labelsonfirstline:
        with gzutil.GzBytesLines(filename, strip_bom=True) as fh:
            labels_str = next(fh).decode('ascii', 'replace').encode(
                'ascii', 'replace')  # garbage -> '?'
        if options.quote_support:
            labels = []
            sep = options.separator
            while labels_str is not None:
                if labels_str.startswith((
                        '"',
                        "'",
                )):
                    q = labels_str[0]
                    pos = 1
                    while pos + 1 < len(labels_str):
                        pos = labels_str.find(q, pos)
                        if pos == -1:  # all is lost
                            pos = len(labels_str) - 1
                        if pos + 1 == len(labels_str):  # eol
                            break
                        if labels_str[pos + 1] == sep:
                            break
                        # we'll just assume it was a quote, because what else to do?
                        labels_str = labels_str[:pos] + labels_str[pos + 1:]
                        pos += 1
                    labels.append(labels_str[1:pos])
                    if len(labels_str) > pos + 1:
                        labels_str = labels_str[pos + 2:]
                    else:
                        labels_str = None
                else:
                    if sep in labels_str:
                        field, labels_str = labels_str.split(sep, 1)
                    else:
                        field, labels_str = labels_str, None
                    labels.append(field)
        else:
            labels = labels_str.split(options.separator)
    labels = options.labels or labels  # only from file if not specified in options
    assert labels, "No labels"
    labels = [options.rename.get(x, x) for x in labels]
    assert '' not in labels, "Empty label for column %d" % (labels.index(''), )
    assert len(labels) == len(set(labels)), "Duplicate labels: %r" % (labels, )

    dw = DatasetWriter(
        columns={n: 'bytes'
                 for n in labels},
        filename=orig_filename,
        hashlabel=options.hashlabel,
        caption='csvimport of ' + orig_filename,
        previous=datasets.previous,
        meta_only=True,
    )

    return separator, filename, orig_filename, labels, dw,
Пример #4
0
	def __init__(self, *a, **kw):
		if PY3:
			self.fh = gzutil.GzUnicodeLines(*a, **kw)
		else:
			self.fh = gzutil.GzBytesLines(*a, **kw)
Пример #5
0

with open(TMP_FN, "wb") as fh:
    fh.write(b"\xef\xbb\xbfa\n\xef\xbb\xbfb")
test_read_bom(0)
with gzutil.GzWriteUnicodeLines(TMP_FN, write_bom=True) as fh:
    fh.write("a")
    fh.write("\ufeffb")
test_read_bom(1)
with gzutil.GzWriteUnicodeLines(TMP_FN, write_bom=True) as fh:
    fh.write("\ufeffa")
    fh.write("\ufeffb")
test_read_bom(2, "\ufeff")
with gzutil.GzWriteUnicodeLines(TMP_FN) as fh:
    fh.write("a")
assert next(gzutil.GzBytesLines(
    TMP_FN)) == b"a", "GzWriteUnicodeLines writes BOM when not requested"

print("Append test")
# And finally verify appending works as expected.
with gzutil.GzWriteInt64(TMP_FN) as fh:
    fh.write(42)
with gzutil.GzWriteInt64(TMP_FN, mode="a") as fh:
    fh.write(18)
with gzutil.GzInt64(TMP_FN) as fh:
    assert list(fh) == [42, 18]

print("Untyped writer test")
with gzutil.GzWrite(TMP_FN) as fh:

    class SubString(bytes):
        pass