def pipe_iterator(path, size=1): count = 0 while True: with open("{}_{}".format(path, count), 'rb') as f: for rec in chunk_iterable(read_recordio(f), size, last='error'): yield rec count += 1
def test_serializer(): s = RecordSerializer() array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] buf = s.serialize(np.array(array_data)) for record_data, expected in zip(read_recordio(buf), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == expected
def test_serializer_accepts_one_dimensional_array(): s = RecordSerializer() array_data = [1.0, 2.0, 3.0] buf = s.serialize(np.array(array_data)) record_data = next(read_recordio(buf)) record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == array_data
def read_pipe(pipe): with open(pipe,'rb') as f: for rec in read_recordio(f): print("read record") print(rec) record = Record() record.ParseFromString(rec) print("record parsed") print(record)
def test_int_write_numpy_to_dense_tensor(): array_data = [[1, 2, 3], [10, 20, 3]] array = np.array(array_data) with tempfile.TemporaryFile() as f: write_numpy_to_dense_tensor(f, array) f.seek(0) for record_data, expected in zip(read_recordio(f), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected
def test_float32_write_numpy_to_dense_tensor(): array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] array = np.array(array_data).astype(np.dtype("float32")) with tempfile.TemporaryFile() as f: write_numpy_to_dense_tensor(f, array) f.seek(0) for record_data, expected in zip(read_recordio(f), array_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].float32_tensor.values == expected
def test_float_label(): array_data = [[1, 2, 3], [10, 20, 3]] array = np.array(array_data) label_data = np.array([99, 98, 97]).astype(np.dtype("float64")) with tempfile.TemporaryFile() as f: write_numpy_to_dense_tensor(f, array, label_data) f.seek(0) for record_data, expected, label in zip(read_recordio(f), array_data, label_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected assert record.label["values"].float64_tensor.values == [label]
def read_pipe(pipe): for i in range(5): with open(pipe + "_{}".format(i), 'rb') as f: print("opened pipe {}".format(i)) count = 0 for label, f1, f2 in chunk_iterable(read_recordio(f), 3): label = int(label.decode('utf-8')) fs1, aud1 = wavfile.read(BytesIO(f1)) fs2, aud2 = wavfile.read(BytesIO(f2)) print("{} label: {}".format(count, label)) print("audio1: {},{}".format(fs1, aud1.shape)) print("audio2: {},{}".format(fs2, aud2.shape)) count += 1
def test_dense_int_write_spmatrix_to_sparse_tensor(): array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] keys_data = [[0, 1, 2], [0, 1, 2]] array = coo_matrix(np.array(array_data).astype(np.dtype('int'))) with tempfile.TemporaryFile() as f: write_spmatrix_to_sparse_tensor(f, array) f.seek(0) for record_data, expected_data, expected_keys in zip(read_recordio(f), array_data, keys_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected_data assert record.features["values"].int32_tensor.keys == expected_keys assert record.features["values"].int32_tensor.shape == [len(expected_data)]
def test_dense_float64_spmatrix_to_sparse_label(): array_data = [[1, 2, 3], [10, 20, 3]] keys_data = [[0, 1, 2], [0, 1, 2]] array = coo_matrix(np.array(array_data).astype("float64")) label_data = np.array([99, 98, 97]) with tempfile.TemporaryFile() as f: write_spmatrix_to_sparse_tensor(f, array, label_data) f.seek(0) for record_data, expected_data, expected_keys, label in zip( read_recordio(f), array_data, keys_data, label_data ): record = Record() record.ParseFromString(record_data) assert record.features["values"].float64_tensor.values == expected_data assert record.features["values"].float64_tensor.keys == expected_keys assert record.label["values"].int32_tensor.values == [label] assert record.features["values"].float64_tensor.shape == [len(expected_data)]
def __iter__(self): info = data.get_worker_info() if not info: worker = None else: worker = info.id if worker is None: assert len(self.paths) == 1 path = self.paths[0] else: assert worker >= 0 assert worker < len(self.paths) path = self.paths[worker] if path not in self.counts: self.counts[path] = 0 with open("{}_{}".format(path, self.counts[path]),'rb') as f: self.counts[path] += 1 for rec in chunk_iterable(read_recordio(f), self.size, last='error'): yield rec
def test_sparse_int_write_spmatrix_to_sparse_tensor(): n = 4 array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] flatten_data = list(itertools.chain.from_iterable(array_data)) y_indices = list(itertools.chain.from_iterable(keys_data)) x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] x_indices = list(itertools.chain.from_iterable(x_indices)) array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype='int') with tempfile.TemporaryFile() as f: write_spmatrix_to_sparse_tensor(f, array) f.seek(0) for record_data, expected_data, expected_keys in zip(read_recordio(f), array_data, keys_data): record = Record() record.ParseFromString(record_data) assert record.features["values"].int32_tensor.values == expected_data assert record.features["values"].int32_tensor.keys == expected_keys assert record.features["values"].int32_tensor.shape == [n]