def list_experiments(api_client, fieldmask=None, read_time=None): """Yields all of the calling user's experiments. Args: api_client: A TensorBoardExporterService stub instance. fieldmask: An optional `experiment_pb2.ExperimentMask` value. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: For each experiment owned by the user, an `experiment_pb2.Experiment` value, or a simple string experiment ID for older servers. """ if read_time is None: read_time = time.time() request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) if fieldmask: request.experiments_mask.CopyFrom(fieldmask) stream = api_client.StreamExperiments( request, metadata=grpc_util.version_metadata()) for response in stream: if response.experiments: for experiment in response.experiments: yield experiment else: # Old servers. for experiment_id in response.experiment_ids: yield experiment_id
def stream_experiment_data(request, **kwargs): self.assertEqual(request.experiment_id, "789") self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) for run in ("train", "test"): for tag in ("accuracy", "loss"): response = export_service_pb2.StreamExperimentDataResponse( ) response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( test_util.scalar_metadata(display_name)) for step in range(10): response.points.steps.append(step) if tag == "loss": if run == "train": value = 1.0 / (step + 1) seconds = step else: value = -1.0 / (step + 1) seconds = 600 + step else: # "accuracy" if run == "train": value = 1.0 / (10 - step) seconds = step * 2 else: value = -1.0 / (10 - step) seconds = 600 + step * 2 response.points.values.append(value) response.points.wall_times.add(seconds=seconds, nanos=0) yield response
def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="123") yield response
def _request_scalar_data(self, experiment_id, read_time): """Yields JSON-serializable blocks of scalar data.""" request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = experiment_id util.set_timestamp(request.read_timestamp, read_time) # No special error handling as we don't expect any errors from these # calls: all experiments should exist (read consistency timestamp) # and be owned by the calling user (only queried for own experiment # IDs). Any non-transient errors would be internal, and we have no # way to efficiently resume from transient errors because the server # does not support pagination. stream = self._api.StreamExperimentData( request, metadata=grpc_util.version_metadata()) for response in stream: metadata = base64.b64encode( response.tag_metadata.SerializeToString()).decode("ascii") wall_times = [ t.ToNanoseconds() / 1e9 for t in response.points.wall_times ] yield { u"run": response.run_name, u"tag": response.tag_name, u"summary_metadata": metadata, u"points": { u"steps": list(response.points.steps), u"wall_times": wall_times, u"values": list(response.points.values), }, }
def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) for run in ("train_1", "train_2"): for tag in ("dense_1/kernel", "dense_1/bias", "text/test"): response = export_service_pb2.StreamExperimentDataResponse( ) response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( test_util.scalar_metadata(display_name)) for step in range(2): response.tensors.steps.append(step) response.tensors.wall_times.add( seconds=1571084520 + step, nanos=862939144 if run == "train_1" else 962939144, ) if tag != "text/test": response.tensors.values.append( tensor_util.make_tensor_proto( np.ones([3, 2]) * step)) else: response.tensors.values.append( tensor_util.make_tensor_proto( np.full([3], "a" * (step + 1)))) yield response
def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) yield export_service_pb2.StreamExperimentsResponse( experiment_ids=["123", "456"]) yield export_service_pb2.StreamExperimentsResponse( experiment_ids=["789"])
def _request_json_data(self, experiment_id, read_time): """Given experiment id, generates JSON data and destination file name. The JSON data describes the run, tag, metadata, in addition to - Actual data in the case of scalars - Pointer to binary files in the case of blob sequences. For the case of blob sequences, this method has the side effect of downloading the contents of the blobs and writing them to files in a subdirectory of the experiment directory. Args: experiment_id: The id of the experiment to request data for. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: (JSON-serializable data, destination file name) tuples. """ request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = experiment_id util.set_timestamp(request.read_timestamp, read_time) # No special error handling as we don't expect any errors from these # calls: all experiments should exist (read consistency timestamp) # and be owned by the calling user (only queried for own experiment # IDs). Any non-transient errors would be internal, and we have no # way to efficiently resume from transient errors because the server # does not support pagination. stream = self._api.StreamExperimentData( request, metadata=grpc_util.version_metadata() ) for response in stream: metadata = base64.b64encode( response.tag_metadata.SerializeToString() ).decode("ascii") json_data = { u"run": response.run_name, u"tag": response.tag_name, u"summary_metadata": metadata, } filename = None if response.HasField("points"): json_data[u"points"] = self._process_scalar_points( response.points ) filename = _FILENAME_SCALARS elif response.HasField("tensors"): json_data[u"points"] = self._process_tensor_points( response.tensors, experiment_id ) filename = _FILENAME_TENSORS elif response.HasField("blob_sequences"): json_data[u"points"] = self._process_blob_sequence_points( response.blob_sequences, experiment_id ) filename = _FILENAME_BLOB_SEQUENCES if filename: yield json_data, filename
def get_scalars( self, runs_filter=None, tags_filter=None, pivot=False, include_wall_time=False, ): # NOTE(#3650): Import pandas early in this method, so if the # Python environment does not have pandas installed, an error can be # raised early, before any rpc call is made. pandas = import_pandas() if runs_filter is not None: raise NotImplementedError( "runs_filter support for get_scalars() is not implemented yet." ) if tags_filter is not None: raise NotImplementedError( "tags_filter support for get_scalars() is not implemented yet." ) request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = self._experiment_id read_time = time.time() util.set_timestamp(request.read_timestamp, read_time) # TODO(cais, wchargin): Use another rpc to check for staleness and avoid # a new StreamExperimentData rpc request if data is not stale. stream = self._api_client.StreamExperimentData( request, metadata=grpc_util.version_metadata() ) runs = [] tags = [] steps = [] wall_times = [] values = [] for response in stream: # TODO(cais, wchargin): Display progress bar during data loading. num_values = len(response.points.values) runs.extend([response.run_name] * num_values) tags.extend([response.tag_name] * num_values) steps.extend(list(response.points.steps)) wall_times.extend( [t.ToNanoseconds() / 1e9 for t in response.points.wall_times] ) values.extend(list(response.points.values)) data = { "run": runs, "tag": tags, "step": steps, "value": values, } if include_wall_time: data["wall_time"] = wall_times dataframe = pandas.DataFrame(data) if pivot: dataframe = self._pivot_dataframe(dataframe) return dataframe
def test_structure(self): result = grpc_util.version_metadata() self.assertIsInstance(result, tuple) for kv in result: self.assertIsInstance(kv, tuple) self.assertLen(kv, 2) (k, v) = kv self.assertIsInstance(k, str) self.assertIsInstance(v, str)
def _request_experiment_ids(self, read_time): """Yields all of the calling user's experiment IDs, as strings.""" request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) stream = self._api.StreamExperiments( request, metadata=grpc_util.version_metadata()) for response in stream: for experiment_id in response.experiment_ids: yield experiment_id
def stream_experiment_data(request, **kwargs): self.assertEqual(request.experiment_id, "789") self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentDataResponse() response.run_name = "train" response.tag_name = "batch_loss" response.points.steps.append(0) response.points.values.append(np.nan) response.points.wall_times.add(seconds=0, nanos=0) response.points.steps.append(1) response.points.values.append(np.inf) response.points.wall_times.add(seconds=10, nanos=0) yield response
def test_call_with_retries_includes_version_metadata(self): def digest(s): """Hashes a string into a 32-bit integer.""" return int(hashlib.sha256(s.encode("utf-8")).hexdigest(), 16) & 0xffffffff def handler(request, context): metadata = context.invocation_metadata() client_version = grpc_util.extract_version(metadata) return make_response(digest(client_version)) server = TestGrpcServer(handler) with server.run() as client: response = grpc_util.call_with_retries(client.TestRpc, make_request(0)) expected_nonce = digest( grpc_util.extract_version(grpc_util.version_metadata())) self.assertEqual(make_response(expected_nonce), response)
def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) for run in ("train", "test"): for tag in ("accuracy", "loss"): response = export_service_pb2.StreamExperimentDataResponse() response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( test_util.scalar_metadata(display_name)) for step in range(10): response.points.steps.append(step) response.points.values.append(2.0 * step) response.points.wall_times.add( seconds=1571084520 + step, nanos=862939144) yield response
def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="123") response.experiments.add(experiment_id="456") yield response response = export_service_pb2.StreamExperimentsResponse() experiment = response.experiments.add() experiment.experiment_id = "789" experiment.name = "bert" experiment.description = "ernie" util.set_timestamp(experiment.create_time, 981173106) util.set_timestamp(experiment.update_time, 1015218367) yield response
def get_scalars(self, runs_filter=None, tags_filter=None, pivot=None): if runs_filter is not None: raise NotImplementedError( "runs_filter support for get_scalars() is not implemented yet." ) if tags_filter is not None: raise NotImplementedError( "tags_filter support for get_scalars() is not implemented yet." ) pivot = True if pivot is None else pivot request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = self._experiment_id read_time = time.time() util.set_timestamp(request.read_timestamp, read_time) # TODO(cais, wchargin): Use another rpc to check for staleness and avoid # a new StreamExperimentData rpc request if data is not stale. stream = self._api_client.StreamExperimentData( request, metadata=grpc_util.version_metadata()) runs = [] tags = [] steps = [] wall_times = [] values = [] for response in stream: # TODO(cais, wchargin): Display progress bar during data loading. num_values = len(response.points.values) runs.extend([response.run_name] * num_values) tags.extend([response.tag_name] * num_values) steps.extend(list(response.points.steps)) wall_times.extend( [t.ToNanoseconds() / 1e9 for t in response.points.wall_times]) values.extend(list(response.points.values)) dataframe = pandas.DataFrame({ "run": runs, "tag": tags, "step": steps, "wall_time": wall_times, "value": values, }) if pivot: dataframe = self._pivot_dataframe(dataframe) return dataframe
def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) tag = "__default_graph__" for run in ("train", "test"): response = export_service_pb2.StreamExperimentDataResponse() response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( summary_pb2.SummaryMetadata( data_class=summary_pb2.DATA_CLASS_BLOB_SEQUENCE ) ) for step in range(1): response.blob_sequences.steps.append(step) response.blob_sequences.wall_times.add( seconds=1571084520 + step, nanos=862939144 ) blob_sequence = blob_pb2.BlobSequence() if run == "train": # A finished blob sequence. blob = blob_pb2.Blob( blob_id="%s_blob" % run, state=blob_pb2.BlobState.BLOB_STATE_CURRENT, ) blob_sequence.entries.append( blob_pb2.BlobSequenceEntry(blob=blob) ) # An unfinished blob sequence. blob = blob_pb2.Blob( state=blob_pb2.BlobState.BLOB_STATE_UNFINALIZED, ) blob_sequence.entries.append( blob_pb2.BlobSequenceEntry(blob=blob) ) elif run == "test": blob_sequence.entries.append( # `blob` unspecified: a hole in the blob sequence. blob_pb2.BlobSequenceEntry() ) response.blob_sequences.values.append(blob_sequence) yield response
def list_experiments(api_client, fieldmask=None, read_time=None): """Yields all of the calling user's experiments. Args: api_client: A TensorBoardExporterService stub instance. fieldmask: An optional `experiment_pb2.ExperimentMask` value. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: For each experiment owned by the user, an `experiment_pb2.Experiment` value. Raises: RuntimeError: If the server returns experiment IDs but no experiments, as in an old, unsupported version of the protocol. """ if read_time is None: read_time = time.time() request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) if fieldmask: request.experiments_mask.CopyFrom(fieldmask) stream = api_client.StreamExperiments( request, metadata=grpc_util.version_metadata() ) for response in stream: if response.experiments: for experiment in response.experiments: yield experiment elif response.experiment_ids: raise RuntimeError( "Server sent experiment_ids without experiments: <%r>" % (list(response.experiment_ids),) ) else: # No data: not technically a problem, but not expected. logger.warning( "StreamExperiments RPC returned response with no experiments: <%r>", response, )
def list_experiments(api_client, read_time=None): """Yields all of the calling user's experiment IDs. Args: api_client: A TensorBoardExporterService stub instance. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: One string for each experiment owned by the calling user, in arbitrary order. """ if read_time is None: read_time = time.time() request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) stream = api_client.StreamExperiments( request, metadata=grpc_util.version_metadata()) for response in stream: for experiment_id in response.experiment_ids: yield experiment_id
def _download_blob(self, blob_id, experiment_id): """Download the blob via rpc. Args: blob_id: Id of the blob. experiment_id: Id of the experiment that the blob belongs to. Returns: If the blob is downloaded successfully: The path of the downloaded blob file relative to the experiment directory. Else: `None`. """ # TODO(cais): Deduplicate with internal method perhaps. experiment_dir = _experiment_directory(self._outdir, experiment_id) request = export_service_pb2.StreamBlobDataRequest(blob_id=blob_id) blob_abspath = os.path.join( experiment_dir, _DIRNAME_BLOBS, _FILENAME_BLOBS_PREFIX + blob_id + _FILENAME_BLOBS_SUFFIX, ) with open(blob_abspath, "xb") as f: try: for response in self._api.StreamBlobData( request, metadata=grpc_util.version_metadata() ): # TODO(cais, soergel): validate the various response fields f.write(response.data) except grpc.RpcError as rpc_error: logger.error( "Omitting blob (id: %s) due to download failure: %s", blob_id, rpc_error, ) return None return os.path.relpath(blob_abspath, experiment_dir)
def test_e2e_success_case_with_only_scalar_data(self): mock_api_client = self._create_mock_api_client() mock_api_client.StreamExperiments.return_value = iter( [_make_experiments_response(["789"])]) def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="123") response.experiments.add(experiment_id="456") yield response response = export_service_pb2.StreamExperimentsResponse() experiment = response.experiments.add() experiment.experiment_id = "789" experiment.name = "bert" experiment.description = "ernie" util.set_timestamp(experiment.create_time, 981173106) util.set_timestamp(experiment.update_time, 1015218367) yield response def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) for run in ("train", "test"): for tag in ("accuracy", "loss"): response = export_service_pb2.StreamExperimentDataResponse( ) response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( test_util.scalar_metadata(display_name)) for step in range(10): response.points.steps.append(step) response.points.values.append(2.0 * step) response.points.wall_times.add(seconds=1571084520 + step, nanos=862939144) yield response mock_api_client.StreamExperiments = mock.Mock(wraps=stream_experiments) mock_api_client.StreamExperimentData = mock.Mock( wraps=stream_experiment_data) outdir = os.path.join(self.get_temp_dir(), "outdir") exporter = exporter_lib.TensorBoardExporter(mock_api_client, outdir) start_time = 1571084846.25 start_time_pb = test_util.timestamp_pb(1571084846250000000) generator = exporter.export(read_time=start_time) expected_files = [] self.assertTrue(os.path.isdir(outdir)) self.assertCountEqual(expected_files, _outdir_files(outdir)) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # The first iteration should request the list of experiments and # data for one of them. self.assertEqual(next(generator), "123") expected_files.append(os.path.join("experiment_123", "metadata.json")) expected_files.append(os.path.join("experiment_123", "scalars.json")) expected_files.append(os.path.join("experiment_123", "tensors.json")) # blob_sequences.json should exist and be empty. expected_files.append( os.path.join("experiment_123", "blob_sequences.json")) self.assertCountEqual(expected_files, _outdir_files(outdir)) # Check that the tensors and blob_sequences data files are empty, because # there are no tensors or blob sequences. with open(os.path.join(outdir, "experiment_123", "tensors.json")) as infile: self.assertEqual(infile.read(), "") with open(os.path.join(outdir, "experiment_123", "blob_sequences.json")) as infile: self.assertEqual(infile.read(), "") expected_eids_request = export_service_pb2.StreamExperimentsRequest() expected_eids_request.read_timestamp.CopyFrom(start_time_pb) expected_eids_request.limit = 2**63 - 1 expected_eids_request.experiments_mask.create_time = True expected_eids_request.experiments_mask.update_time = True expected_eids_request.experiments_mask.name = True expected_eids_request.experiments_mask.description = True mock_api_client.StreamExperiments.assert_called_once_with( expected_eids_request, metadata=grpc_util.version_metadata()) expected_data_request = export_service_pb2.StreamExperimentDataRequest( ) expected_data_request.experiment_id = "123" expected_data_request.read_timestamp.CopyFrom(start_time_pb) mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # The next iteration should just request data for the next experiment. mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "456") expected_files.append(os.path.join("experiment_456", "metadata.json")) expected_files.append(os.path.join("experiment_456", "scalars.json")) expected_files.append(os.path.join("experiment_456", "tensors.json")) # blob_sequences.json should exist and be empty. expected_files.append( os.path.join("experiment_456", "blob_sequences.json")) self.assertCountEqual(expected_files, _outdir_files(outdir)) mock_api_client.StreamExperiments.assert_not_called() expected_data_request.experiment_id = "456" mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # Again, request data for the next experiment; this experiment ID # was in the second response batch in the list of IDs. expected_files.append(os.path.join("experiment_789", "metadata.json")) expected_files.append(os.path.join("experiment_789", "scalars.json")) expected_files.append(os.path.join("experiment_789", "tensors.json")) # blob_sequences.json should exist and be empty. expected_files.append( os.path.join("experiment_789", "blob_sequences.json")) mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "789") self.assertCountEqual(expected_files, _outdir_files(outdir)) mock_api_client.StreamExperiments.assert_not_called() expected_data_request.experiment_id = "789" mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # The final continuation shouldn't need to send any RPCs. mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(list(generator), []) self.assertCountEqual(expected_files, _outdir_files(outdir)) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # Spot-check one of the scalar data files. with open(os.path.join(outdir, "experiment_456", "scalars.json")) as infile: jsons = [json.loads(line) for line in infile] self.assertLen(jsons, 4) datum = jsons[2] self.assertEqual(datum.pop("run"), "test") self.assertEqual(datum.pop("tag"), "accuracy") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) expected_summary_metadata = test_util.scalar_metadata("456:accuracy") self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") expected_steps = [x for x in range(10)] expected_values = [2.0 * x for x in range(10)] expected_wall_times = [1571084520.862939144 + x for x in range(10)] self.assertEqual(points.pop("steps"), expected_steps) self.assertEqual(points.pop("values"), expected_values) self.assertEqual(points.pop("wall_times"), expected_wall_times) self.assertEqual(points, {}) self.assertEqual(datum, {}) # Check that one of the blob_sequences data file is empty, because there # no blob sequences in this experiment. with open(os.path.join(outdir, "experiment_456", "blob_sequences.json")) as infile: self.assertEqual(infile.read(), "") # Spot-check one of the metadata files. with open(os.path.join(outdir, "experiment_789", "metadata.json")) as infile: metadata = json.load(infile) self.assertEqual( metadata, { "name": "bert", "description": "ernie", "create_time": "2001-02-03T04:05:06Z", "update_time": "2002-03-04T05:06:07Z", }, )
def test_e2e_success_case_with_blob_sequence_data(self): """Covers exporting of complete and incomplete blob sequences as well as rpc error during blob streaming. """ mock_api_client = self._create_mock_api_client() def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="123") yield response response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="456") yield response def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) tag = "__default_graph__" for run in ("train", "test"): response = export_service_pb2.StreamExperimentDataResponse() response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( summary_pb2.SummaryMetadata( data_class=summary_pb2.DATA_CLASS_BLOB_SEQUENCE)) for step in range(1): response.blob_sequences.steps.append(step) response.blob_sequences.wall_times.add(seconds=1571084520 + step, nanos=862939144) blob_sequence = blob_pb2.BlobSequence() if run == "train": # A finished blob sequence. blob = blob_pb2.Blob( blob_id="%s_blob" % run, state=blob_pb2.BlobState.BLOB_STATE_CURRENT, ) blob_sequence.entries.append( blob_pb2.BlobSequenceEntry(blob=blob)) # An unfinished blob sequence. blob = blob_pb2.Blob( state=blob_pb2.BlobState.BLOB_STATE_UNFINALIZED, ) blob_sequence.entries.append( blob_pb2.BlobSequenceEntry(blob=blob)) elif run == "test": blob_sequence.entries.append( # `blob` unspecified: a hole in the blob sequence. blob_pb2.BlobSequenceEntry()) response.blob_sequences.values.append(blob_sequence) yield response mock_api_client.StreamExperiments = mock.Mock(wraps=stream_experiments) mock_api_client.StreamExperimentData = mock.Mock( wraps=stream_experiment_data) mock_api_client.StreamBlobData.side_effect = [ iter([ export_service_pb2.StreamBlobDataResponse( data=b"4321", offset=0, final_chunk=False, ), export_service_pb2.StreamBlobDataResponse( data=b"8765", offset=4, final_chunk=True, ), ]), # Raise error from `StreamBlobData` to test the grpc-error # condition. test_util.grpc_error(grpc.StatusCode.INTERNAL, "Error for testing"), ] outdir = os.path.join(self.get_temp_dir(), "outdir") exporter = exporter_lib.TensorBoardExporter(mock_api_client, outdir) start_time = 1571084846.25 start_time_pb = test_util.timestamp_pb(1571084846250000000) generator = exporter.export(read_time=start_time) expected_files = [] self.assertTrue(os.path.isdir(outdir)) self.assertCountEqual(expected_files, _outdir_files(outdir)) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # The first iteration should request the list of experiments and # data for one of them. self.assertEqual(next(generator), "123") expected_files.append(os.path.join("experiment_123", "metadata.json")) # scalars.json and tensors.json should exist and be empty. expected_files.append(os.path.join("experiment_123", "scalars.json")) expected_files.append(os.path.join("experiment_123", "tensors.json")) expected_files.append( os.path.join("experiment_123", "blob_sequences.json")) expected_files.append( os.path.join("experiment_123", "blobs", "blob_train_blob.bin")) # blobs/blob_test_blob.bin should not exist, because it contains # an unfinished blob. self.assertCountEqual(expected_files, _outdir_files(outdir)) # Check that the scalars and tensors data files are empty, because there # no scalars or tensors. with open(os.path.join(outdir, "experiment_123", "scalars.json")) as infile: self.assertEqual(infile.read(), "") with open(os.path.join(outdir, "experiment_123", "tensors.json")) as infile: self.assertEqual(infile.read(), "") # Check the blob_sequences.json file. with open(os.path.join(outdir, "experiment_123", "blob_sequences.json")) as infile: jsons = [json.loads(line) for line in infile] self.assertLen(jsons, 2) datum = jsons[0] self.assertEqual(datum.pop("run"), "train") self.assertEqual(datum.pop("tag"), "__default_graph__") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) expected_summary_metadata = summary_pb2.SummaryMetadata( data_class=summary_pb2.DATA_CLASS_BLOB_SEQUENCE) self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") self.assertEqual(datum, {}) self.assertEqual(points.pop("steps"), [0]) self.assertEqual(points.pop("wall_times"), [1571084520.862939144]) # The 1st blob is finished; the 2nd is unfinished. self.assertEqual(points.pop("blob_file_paths"), [["blobs/blob_train_blob.bin", None]]) self.assertEqual(points, {}) datum = jsons[1] self.assertEqual(datum.pop("run"), "test") self.assertEqual(datum.pop("tag"), "__default_graph__") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") self.assertEqual(datum, {}) self.assertEqual(points.pop("steps"), [0]) self.assertEqual(points.pop("wall_times"), [1571084520.862939144]) # `None` blob file path indicates an unfinished blob. self.assertEqual(points.pop("blob_file_paths"), [[None]]) self.assertEqual(points, {}) # Check the BLOB files. with open( os.path.join(outdir, "experiment_123", "blobs", "blob_train_blob.bin"), "rb", ) as f: self.assertEqual(f.read(), b"43218765") # Check call to StreamBlobData. expected_blob_data_request = export_service_pb2.StreamBlobDataRequest( blob_id="train_blob") mock_api_client.StreamBlobData.assert_called_once_with( expected_blob_data_request, metadata=grpc_util.version_metadata()) # Test the case where blob streaming errors out. self.assertEqual(next(generator), "456") # Check the blob_sequences.json file. with open(os.path.join(outdir, "experiment_456", "blob_sequences.json")) as infile: jsons = [json.loads(line) for line in infile] self.assertLen(jsons, 2) datum = jsons[0] self.assertEqual(datum.pop("run"), "train") self.assertEqual(datum.pop("tag"), "__default_graph__") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") self.assertEqual(datum, {}) self.assertEqual(points.pop("steps"), [0]) self.assertEqual(points.pop("wall_times"), [1571084520.862939144]) # `None` represents the blob that experienced error during downloading # and hence is missing. self.assertEqual(points.pop("blob_file_paths"), [[None, None]]) self.assertEqual(points, {}) datum = jsons[1] self.assertEqual(datum.pop("run"), "test") self.assertEqual(datum.pop("tag"), "__default_graph__") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") self.assertEqual(datum, {}) self.assertEqual(points.pop("steps"), [0]) self.assertEqual(points.pop("wall_times"), [1571084520.862939144]) # `None` represents the blob that experienced error during downloading # and hence is missing. self.assertEqual(points.pop("blob_file_paths"), [[None]]) self.assertEqual(points, {})
def test_e2e_success_case_with_only_tensors_data(self): mock_api_client = self._create_mock_api_client() def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="123") yield response def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) for run in ("train_1", "train_2"): for tag in ("dense_1/kernel", "dense_1/bias", "text/test"): response = export_service_pb2.StreamExperimentDataResponse( ) response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( test_util.scalar_metadata(display_name)) for step in range(2): response.tensors.steps.append(step) response.tensors.wall_times.add( seconds=1571084520 + step, nanos=862939144 if run == "train_1" else 962939144, ) if tag != "text/test": response.tensors.values.append( tensor_util.make_tensor_proto( np.ones([3, 2]) * step)) else: response.tensors.values.append( tensor_util.make_tensor_proto( np.full([3], "a" * (step + 1)))) yield response mock_api_client.StreamExperiments = mock.Mock(wraps=stream_experiments) mock_api_client.StreamExperimentData = mock.Mock( wraps=stream_experiment_data) outdir = os.path.join(self.get_temp_dir(), "outdir") exporter = exporter_lib.TensorBoardExporter(mock_api_client, outdir) start_time = 1571084846.25 start_time_pb = test_util.timestamp_pb(1571084846250000000) generator = exporter.export(read_time=start_time) expected_files = [] self.assertTrue(os.path.isdir(outdir)) self.assertCountEqual(expected_files, _outdir_files(outdir)) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # The first iteration should request the list of experiments and # data for one of them. self.assertEqual(next(generator), "123") expected_files.append(os.path.join("experiment_123", "metadata.json")) # scalars.json should exist and be empty. expected_files.append(os.path.join("experiment_123", "scalars.json")) expected_files.append(os.path.join("experiment_123", "tensors.json")) # blob_sequences.json should exist and be empty. expected_files.append( os.path.join("experiment_123", "blob_sequences.json")) expected_files.append( os.path.join("experiment_123", "tensors", "1571084520.862939.npz")) expected_files.append( os.path.join("experiment_123", "tensors", "1571084520.862939_1.npz")) expected_files.append( os.path.join("experiment_123", "tensors", "1571084520.862939_2.npz")) expected_files.append( os.path.join("experiment_123", "tensors", "1571084520.962939.npz")) expected_files.append( os.path.join("experiment_123", "tensors", "1571084520.962939_1.npz")) expected_files.append( os.path.join("experiment_123", "tensors", "1571084520.962939_2.npz")) self.assertCountEqual(expected_files, _outdir_files(outdir)) # Check that the scalars and blob_sequences data files are empty, because # there are no scalars or blob sequences. with open(os.path.join(outdir, "experiment_123", "scalars.json")) as infile: self.assertEqual(infile.read(), "") with open(os.path.join(outdir, "experiment_123", "blob_sequences.json")) as infile: self.assertEqual(infile.read(), "") expected_eids_request = export_service_pb2.StreamExperimentsRequest() expected_eids_request.read_timestamp.CopyFrom(start_time_pb) expected_eids_request.limit = 2**63 - 1 expected_eids_request.experiments_mask.create_time = True expected_eids_request.experiments_mask.update_time = True expected_eids_request.experiments_mask.name = True expected_eids_request.experiments_mask.description = True mock_api_client.StreamExperiments.assert_called_once_with( expected_eids_request, metadata=grpc_util.version_metadata()) expected_data_request = export_service_pb2.StreamExperimentDataRequest( ) expected_data_request.experiment_id = "123" expected_data_request.read_timestamp.CopyFrom(start_time_pb) mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # The final StreamExperiments continuation shouldn't need to send any # RPCs. mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(list(generator), []) # Check tensor data. with open(os.path.join(outdir, "experiment_123", "tensors.json")) as infile: jsons = [json.loads(line) for line in infile] self.assertLen(jsons, 6) datum = jsons[0] self.assertEqual(datum.pop("run"), "train_1") self.assertEqual(datum.pop("tag"), "dense_1/kernel") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) expected_summary_metadata = test_util.scalar_metadata( "123:dense_1/kernel") self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") self.assertEqual(points.pop("steps"), [0, 1]) self.assertEqual( points.pop("tensors_file_path"), os.path.join("tensors", "1571084520.862939.npz"), ) self.assertEqual(datum, {}) datum = jsons[4] self.assertEqual(datum.pop("run"), "train_2") self.assertEqual(datum.pop("tag"), "dense_1/bias") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) expected_summary_metadata = test_util.scalar_metadata( "123:dense_1/bias") self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") self.assertEqual(points.pop("steps"), [0, 1]) self.assertEqual( points.pop("tensors_file_path"), os.path.join("tensors", "1571084520.962939_1.npz"), ) self.assertEqual(datum, {}) # Load and check the tensor data from the save .npz files. for filename in ( "1571084520.862939.npz", "1571084520.862939_1.npz", "1571084520.962939.npz", "1571084520.962939_1.npz", ): tensors = np.load( os.path.join(outdir, "experiment_123", "tensors", filename)) tensors = [tensors[key] for key in tensors.keys()] self.assertLen(tensors, 2) np.testing.assert_array_equal(tensors[0], 0 * np.ones([3, 2])) np.testing.assert_array_equal(tensors[1], 1 * np.ones([3, 2])) for filename in ( "1571084520.862939_2.npz", "1571084520.962939_2.npz", ): tensors = np.load( os.path.join(outdir, "experiment_123", "tensors", filename)) tensors = [tensors[key] for key in tensors.keys()] self.assertLen(tensors, 2) np.testing.assert_array_equal(tensors[0], np.array(["a", "a", "a"], "|S")) np.testing.assert_array_equal(tensors[1], np.array(["aa", "aa", "aa"], "|S"))
def test_e2e_success_case(self): mock_api_client = self._create_mock_api_client() mock_api_client.StreamExperiments.return_value = iter([ export_service_pb2.StreamExperimentsResponse( experiment_ids=["789"]), ]) def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) yield export_service_pb2.StreamExperimentsResponse( experiment_ids=["123", "456"]) yield export_service_pb2.StreamExperimentsResponse( experiment_ids=["789"]) def stream_experiment_data(request, **kwargs): self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) for run in ("train", "test"): for tag in ("accuracy", "loss"): response = export_service_pb2.StreamExperimentDataResponse( ) response.run_name = run response.tag_name = tag display_name = "%s:%s" % (request.experiment_id, tag) response.tag_metadata.CopyFrom( test_util.scalar_metadata(display_name)) for step in range(10): response.points.steps.append(step) response.points.values.append(2.0 * step) response.points.wall_times.add(seconds=1571084520 + step, nanos=862939144) yield response mock_api_client.StreamExperiments = mock.Mock(wraps=stream_experiments) mock_api_client.StreamExperimentData = mock.Mock( wraps=stream_experiment_data) outdir = os.path.join(self.get_temp_dir(), "outdir") exporter = exporter_lib.TensorBoardExporter(mock_api_client, outdir) start_time = 1571084846.25 start_time_pb = test_util.timestamp_pb(1571084846250000000) generator = exporter.export(read_time=start_time) expected_files = [] self.assertTrue(os.path.isdir(outdir)) self.assertCountEqual(expected_files, os.listdir(outdir)) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # The first iteration should request the list of experiments and # data for one of them. self.assertEqual(next(generator), "123") expected_files.append("scalars_123.json") self.assertCountEqual(expected_files, os.listdir(outdir)) expected_eids_request = export_service_pb2.StreamExperimentsRequest() expected_eids_request.read_timestamp.CopyFrom(start_time_pb) expected_eids_request.limit = 2**63 - 1 mock_api_client.StreamExperiments.assert_called_once_with( expected_eids_request, metadata=grpc_util.version_metadata()) expected_data_request = export_service_pb2.StreamExperimentDataRequest( ) expected_data_request.experiment_id = "123" expected_data_request.read_timestamp.CopyFrom(start_time_pb) mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # The next iteration should just request data for the next experiment. mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "456") expected_files.append("scalars_456.json") self.assertCountEqual(expected_files, os.listdir(outdir)) mock_api_client.StreamExperiments.assert_not_called() expected_data_request.experiment_id = "456" mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # Again, request data for the next experiment; this experiment ID # was in the second response batch in the list of IDs. expected_files.append("scalars_789.json") mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(next(generator), "789") self.assertCountEqual(expected_files, os.listdir(outdir)) mock_api_client.StreamExperiments.assert_not_called() expected_data_request.experiment_id = "789" mock_api_client.StreamExperimentData.assert_called_once_with( expected_data_request, metadata=grpc_util.version_metadata()) # The final continuation shouldn't need to send any RPCs. mock_api_client.StreamExperiments.reset_mock() mock_api_client.StreamExperimentData.reset_mock() self.assertEqual(list(generator), []) self.assertCountEqual(expected_files, os.listdir(outdir)) mock_api_client.StreamExperiments.assert_not_called() mock_api_client.StreamExperimentData.assert_not_called() # Spot-check one of the files. with open(os.path.join(outdir, "scalars_456.json")) as infile: jsons = [json.loads(line) for line in infile] self.assertLen(jsons, 4) datum = jsons[2] self.assertEqual(datum.pop("run"), "test") self.assertEqual(datum.pop("tag"), "accuracy") summary_metadata = summary_pb2.SummaryMetadata.FromString( base64.b64decode(datum.pop("summary_metadata"))) expected_summary_metadata = test_util.scalar_metadata("456:accuracy") self.assertEqual(summary_metadata, expected_summary_metadata) points = datum.pop("points") expected_steps = [x for x in range(10)] expected_values = [2.0 * x for x in range(10)] expected_wall_times = [1571084520.862939144 + x for x in range(10)] self.assertEqual(points.pop("steps"), expected_steps) self.assertEqual(points.pop("values"), expected_values) self.assertEqual(points.pop("wall_times"), expected_wall_times) self.assertEqual(points, {}) self.assertEqual(datum, {})
def test_roundtrip(self): result = grpc_util.extract_version(grpc_util.version_metadata()) self.assertEqual(result, version.VERSION)