def testSequenceRecordWithCompression(self): vector = np.array([[0.2, 0.3], [0.4, 0.5]], dtype=np.float32) compression = "GZIP" record_file = os.path.join(self.get_temp_dir(), "data.records") record_file = record_inputter.create_sequence_records( [vector], record_file, compression=compression) inputter = record_inputter.SequenceRecordInputter(2) dataset = inputter.make_inference_dataset(record_file, batch_size=1) iterator = iter(dataset) self.assertAllEqual(next(iterator)["tensor"].numpy()[0], vector)
def testWordEmbedderWithCompression(self): vocab_file = self._makeTextFile("vocab.txt", ["the", "world", "hello", "■"]) data_file = self._makeTextFile("data.txt", ["hello world !", "how are you ?"], compress=True) inputter = text_inputter.WordEmbedder(embedding_size=10) inputter.initialize(dict(vocabulary=vocab_file)) dataset = inputter.make_inference_dataset(data_file, batch_size=1) iterator = iter(dataset) self.assertAllEqual( next(iterator)["tokens"].numpy()[0], [b"hello", b"world", b"!"])