示例#1
0
    def test_round_trip(self):
        '''Can round-trip content.'''
        content = 'This is some test content.'
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write(content)

            with gzippy.open(path, 'rb') as fin:
                self.assertEqual(fin.read(), content)
示例#2
0
    def test_incremental_reads(self):
        '''Incremental reads'''
        content = b'This is some test content.'
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write(content)

            with gzippy.open(path, 'rb') as fin:
                self.assertEqual(fin.read(10), content[:10])
示例#3
0
    def test_lines(self):
        '''Can read the file line by line.'''
        parts = ['some\n', 'lines\n', 'in\n', 'a\n', 'file']
        content = ''.join(parts)
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write(content)

            with gzippy.open(path) as fin:
                self.assertEqual(list(fin), parts)
示例#4
0
    def test_unsupported_incremental_reads(self):
        '''Incremental reads are as-of-yet unsupported.'''
        content = 'This is some test content.'
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write(content)

            with self.assertRaises(IOError):
                with gzippy.open(path, 'rb') as fin:
                    fin.read(10)
示例#5
0
    def test_lines_consolidation(self):
        '''Consolidates lines across multiple chunks.'''
        parts = [b'some\n', b'lines\n', b'in\n', b'a\n', b'file']
        chunks = [b'so', b'm', b'e\nlines\n', b'i', b'n', b'\n', b'a\nfile']
        content = b''.join(chunks)
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write(content)

            with gzippy.open(path) as fin:
                self.assertEqual(list(fin), parts)
示例#6
0
    def test_lines_consolidation(self):
        '''Consolidates lines across multiple chunks.'''
        parts = ['some\n', 'lines\n', 'in\n', 'a\n', 'file']
        chunks = ['so', 'm', 'e\nlines\n', 'i', 'n', '\n', 'a\nfile']
        content = ''.join(chunks)
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write(content)

            with gzippy.open(path) as fin:
                with mock.patch.object(fin, 'chunks', iter(chunks)):
                    self.assertEqual(list(fin), parts)
示例#7
0
    def test_reader_size_mismatch(self):
        '''Raises an exception when the size doesn't match.'''
        with scratch_file('example.gz') as path:
            with gzippy.open(path, 'wb') as fout:
                fout.write('This is some test content.')

            # Rewrite the last four bytes
            with open(path, 'r+b') as fout:
                fout.seek(-4, 2)
                fout.write('\xFF' * 4)

            with self.assertRaises(IOError):
                with gzippy.open(path) as fin:
                    fin.read()
示例#8
0
def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('../data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = cPickle.load(f)
    f.close()
    return (training_data, validation_data, test_data)
示例#9
0
    def test_open_with_append(self):
        '''Opening in append mode is not allowed.'''
        with scratch_file('example.gz') as path:
            with open(path, 'w+') as fout:
                pass

            with self.assertRaises(ValueError):
                with gzippy.open(path, 'ab') as fout:
                    pass
示例#10
0
    def test_open_with_plus(self):
        '''Opening with r+ is not allowed.'''
        with scratch_file('example.gz') as path:
            with open(path, 'w+') as fout:
                pass

            with self.assertRaises(ValueError):
                with gzippy.open(path, 'r+') as fin:
                    pass
示例#11
0
        yield run
        start += time.time()
        times.append(start)

    print('Average of %s runs: %s\n' % (runs, sum(times) / len(times)))


split = [b'This is some example content'] * 100000
joined = b'\n'.join(split)

for _ in timer('Gzip single write', 5):
    with gzip.open('example.gz', 'wb') as fout:
        fout.write(joined)

for _ in timer('Gzippy single write', 5):
    with gzippy.open('example.gz', 'wb') as fout:
        fout.write(joined)

for _ in timer('Gzip with repeated writes', 5):
    with gzip.open('example.gz', 'wb') as fout:
        for element in split:
            fout.write(element)

for _ in timer('Gzip with repeated writes', 5):
    with gzippy.open('example.gz', 'wb') as fout:
        for element in split:
            fout.write(element)

# Read in an example file
with gzip.open('example.gz', 'wb') as fout:
    fout.write(joined)