def readline_block_boundary(self): kwargs = {} if pydoop.hadoop_version_info().has_deprecated_bs(): bs = hdfs.fs.hdfs().default_block_size() else: bs = u.get_bytes_per_checksum() kwargs["blocksize"] = bs line = "012345678\n" path = self._make_random_path() with self.fs.open_file(path, flags="w", **kwargs) as f: bytes_written = lines_written = 0 while bytes_written < bs + 1: f.write(line) lines_written += 1 bytes_written += len(line) with self.fs.open_file(path) as f: lines = [] while 1: l = f.readline() if l == "": break lines.append(l) self.assertEqual(len(lines), lines_written) for i, l in enumerate(lines): self.assertEqual(l, line, "line %d: %r != %r" % (i, l, line))
def block_boundary(self): path = self._make_random_path() CHUNK_SIZE = 10 N = 2 kwargs = {} if pydoop.hadoop_version_info().has_deprecated_bs(): bs = hdfs.fs.hdfs().default_block_size() else: bs = N * get_bytes_per_checksum() kwargs['blocksize'] = bs total_data_size = 2 * bs with self.fs.open_file(path, "w", **kwargs) as f: data = make_random_data(total_data_size) i = 0 bufsize = hdfs.common.BUFSIZE while i < len(data): f.write(data[i:i+bufsize]) i += bufsize with self.fs.open_file(path) as f: p = total_data_size - CHUNK_SIZE for pos in 0, 1, bs-1, bs, bs+1, p-1, p, p+1, total_data_size-1: expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos f.seek(pos) chunk = f.read(CHUNK_SIZE) self.assertEqual(len(chunk), expected_len)
def block_boundary(self): path = self._make_random_path() CHUNK_SIZE = 10 N = 2 kwargs = {} if pydoop.hadoop_version_info().has_deprecated_bs(): bs = hdfs.fs.hdfs().default_block_size() else: bs = N * get_bytes_per_checksum() kwargs['blocksize'] = bs total_data_size = 2 * bs with self.fs.open_file(path, "w", **kwargs) as f: data = make_random_data(total_data_size) i = 0 bufsize = hdfs.common.BUFSIZE while i < len(data): f.write(data[i:i + bufsize]) i += bufsize with self.fs.open_file(path) as f: p = total_data_size - CHUNK_SIZE for pos in 0, 1, bs - 1, bs, bs + 1, p - 1, p, p + 1, total_data_size - 1: expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos f.seek(pos) chunk = f.read(CHUNK_SIZE) self.assertEqual(len(chunk), expected_len)
def readline_block_boundary(self): kwargs = {} if pydoop.hadoop_version_info().has_deprecated_bs(): bs = hdfs.fs.hdfs().default_block_size() else: bs = u.get_bytes_per_checksum() kwargs['blocksize'] = bs line = "012345678\n" path = self._make_random_path() with self.fs.open_file(path, flags="w", **kwargs) as f: bytes_written = lines_written = 0 while bytes_written < bs + 1: f.write(line) lines_written += 1 bytes_written += len(line) with self.fs.open_file(path) as f: lines = [] while 1: l = f.readline() if l == "": break lines.append(l) self.assertEqual(len(lines), lines_written) for i, l in enumerate(lines): self.assertEqual(l, line, "line %d: %r != %r" % (i, l, line))
def block_boundary(self): path = self._make_random_path() CHUNK_SIZE = 10 N = 2 bs = N * get_bytes_per_checksum() total_data_size = 2 * bs with self.fs.open_file(path, "w", blocksize=bs) as f: f.write(make_random_data(total_data_size)) with self.fs.open_file(path) as f: p = total_data_size - CHUNK_SIZE for pos in 0, 1, bs-1, bs, bs+1, p-1, p, p+1, total_data_size-1: expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos f.seek(pos) chunk = f.read(CHUNK_SIZE) self.assertEqual(len(chunk), expected_len)
def readline_block_boundary(self): bs = u.get_bytes_per_checksum() line = "012345678\n" path = self._make_random_path() with self.fs.open_file(path, flags="w", blocksize=bs) as f: bytes_written = lines_written = 0 while bytes_written < bs + 1: f.write(line) lines_written += 1 bytes_written += len(line) with self.fs.open_file(path) as f: lines = [] while 1: l = f.readline() if l == "": break lines.append(l) self.assertEqual(len(lines), lines_written) for i, l in enumerate(lines): self.assertEqual(l, line, "line %d: %r != %r" % (i, l, line))