def _create_point(self, tag_proto, event, value): """Adds a scalar point to the given tag, if there's space. Args: tag_proto: `WriteScalarRequest.Tag` proto to which to add a point. event: Enclosing `Event` proto with the step and wall time data. value: Scalar `Summary.Value` proto with the actual scalar data. Returns: The `ScalarPoint` that was added to `tag_proto.points`. Raises: _OutOfSpaceError: If adding the point would exceed the remaining request budget. """ point = tag_proto.points.add() point.step = event.step # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set point.value = tensor_util.make_ndarray(value.tensor).item() util.set_timestamp(point.wall_time, event.wall_time) submessage_cost = point.ByteSize() cost = submessage_cost + _varint_cost(submessage_cost) + 1 # proto key if cost > self._byte_budget: tag_proto.points.pop() raise _OutOfSpaceError() self._byte_budget -= cost return point
def _get_or_create_blob_sequence(self): request = write_service_pb2.GetOrCreateBlobSequenceRequest( experiment_id=self._experiment_id, run=self._run_name, tag=self._value.tag, step=self._event.step, final_sequence_length=len(self._blobs), metadata=self._metadata, ) util.set_timestamp(request.wall_time, self._event.wall_time) with _request_logger(request): try: # TODO(@nfelt): execute this RPC asynchronously. response = grpc_util.call_with_retries( self._api.GetOrCreateBlobSequence, request ) blob_sequence_id = response.blob_sequence_id except grpc.RpcError as e: if e.code() == grpc.StatusCode.NOT_FOUND: raise ExperimentNotFoundError() logger.error("Upload call failed with error %s", e) # TODO(soergel): clean up raise return blob_sequence_id
def _create_point(self, tag_proto, event, value): """Adds a tensor point to the given tag, if there's space. Args: tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. event: Enclosing `Event` proto with the step and wall time data. value: Tensor `Summary.Value` proto with the actual tensor data. Returns: The `TensorPoint` that was added to `tag_proto.points`. Raises: _OutOfSpaceError: If adding the point would exceed the remaining request budget. """ point = tag_proto.points.add() point.step = event.step point.value.CopyFrom(value.tensor) util.set_timestamp(point.wall_time, event.wall_time) try: self._byte_budget_manager.add_point(point) except _OutOfSpaceError: tag_proto.points.pop() raise return point
def _request_scalar_data(self, experiment_id, read_time): """Yields JSON-serializable blocks of scalar data.""" request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = experiment_id util.set_timestamp(request.read_timestamp, read_time) # No special error handling as we don't expect any errors from these # calls: all experiments should exist (read consistency timestamp) # and be owned by the calling user (only queried for own experiment # IDs). Any non-transient errors would be internal, and we have no # way to efficiently resume from transient errors because the server # does not support pagination. stream = self._api.StreamExperimentData( request, metadata=grpc_util.version_metadata()) for response in stream: metadata = base64.b64encode( response.tag_metadata.SerializeToString()).decode("ascii") wall_times = [ t.ToNanoseconds() / 1e9 for t in response.points.wall_times ] yield { u"run": response.run_name, u"tag": response.tag_name, u"summary_metadata": metadata, u"points": { u"steps": list(response.points.steps), u"wall_times": wall_times, u"values": list(response.points.values), }, }
def testJsonFormatterWithEmptyNameAndDescription(self): experiment = experiment_pb2.Experiment( experiment_id="deadbeef", # NOTE(cais): `name` and `description` are missing here. num_runs=2, num_tags=4, num_scalars=60, total_blob_bytes=1234, ) util.set_timestamp(experiment.create_time, 981173106) util.set_timestamp(experiment.update_time, 1015218367) experiment_url = "http://tensorboard.dev/deadbeef" formatter = formatters.JsonFormatter() output = self._format(formatter, experiment, experiment_url) expected_lines = [ "{", ' "url": "http://tensorboard.dev/deadbeef",', ' "name": "",', ' "description": "",', ' "id": "deadbeef",', ' "created": "2001-02-03T04:05:06Z",', ' "updated": "2002-03-04T05:06:07Z",', ' "runs": 2,', ' "tags": 4,', ' "scalars": 60,', ' "binary_object_bytes": 1234', "}", ] self.assertEqual(output.split("\n"), expected_lines)
def list_experiments(api_client, fieldmask=None, read_time=None): """Yields all of the calling user's experiments. Args: api_client: A TensorBoardExporterService stub instance. fieldmask: An optional `experiment_pb2.ExperimentMask` value. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: For each experiment owned by the user, an `experiment_pb2.Experiment` value, or a simple string experiment ID for older servers. """ if read_time is None: read_time = time.time() request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) if fieldmask: request.experiments_mask.CopyFrom(fieldmask) stream = api_client.StreamExperiments( request, metadata=grpc_util.version_metadata()) for response in stream: if response.experiments: for experiment in response.experiments: yield experiment else: # Old servers. for experiment_id in response.experiment_ids: yield experiment_id
def testReadableFormatterWithEmptyNameAndDescription(self): experiment = experiment_pb2.Experiment( experiment_id="deadbeef", # NOTE(cais): `name` and `description` are missing here. num_runs=2, num_tags=4, num_scalars=60, total_blob_bytes=1234, ) util.set_timestamp(experiment.create_time, 981173106) util.set_timestamp(experiment.update_time, 1015218367) experiment_url = "http://tensorboard.dev/deadbeef" formatter = formatters.ReadableFormatter() output = self._format(formatter, experiment, experiment_url) expected_lines = [ "http://tensorboard.dev/deadbeef", "\tName [No Name]", "\tDescription [No Description]", "\tId deadbeef", "\tCreated 2001-02-03 04:05:06", "\tUpdated 2002-03-04 05:06:07", "\tRuns 2", "\tTags 4", "\tScalars 60", "\tBinary object bytes 1234", ] self.assertEqual(output.split("\n"), expected_lines)
def _create_point(self, tag_proto, event, value): """Adds a tensor point to the given tag, if there's space. Args: tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. event: Enclosing `Event` proto with the step and wall time data. value: Tensor `Summary.Value` proto with the actual tensor data. Raises: _OutOfSpaceError: If adding the point would exceed the remaining request budget. """ point = tag_proto.points.add() point.step = event.step point.value.CopyFrom(value.tensor) util.set_timestamp(point.wall_time, event.wall_time) if point.value.ByteSize() > self._max_tensor_point_size: logger.warning( "Tensor too large; skipping. " "Size %d exceeds limit of %d bytes.", point.value.ByteSize(), self._max_tensor_point_size, ) tag_proto.points.pop() return try: self._byte_budget_manager.add_point(point) except _OutOfSpaceError: tag_proto.points.pop() raise
def testReadableFormatterWithNonUtcTimezone(self): experiment = experiment_pb2.Experiment( experiment_id="deadbeef", name="A name for the experiment", description="A description for the experiment", num_runs=2, num_tags=4, num_scalars=60, total_blob_bytes=1234, ) util.set_timestamp(experiment.create_time, 981173106) util.set_timestamp(experiment.update_time, 1015218367) experiment_url = "http://tensorboard.dev/deadbeef" formatter = formatters.ReadableFormatter() output = self._format( formatter, experiment, experiment_url, timezone="America/Los_Angeles", ) expected_lines = [ "http://tensorboard.dev/deadbeef", "\tName A name for the experiment", "\tDescription A description for the experiment", "\tId deadbeef", "\tCreated 2001-02-02 20:05:06", "\tUpdated 2002-03-03 21:06:07", "\tRuns 2", "\tTags 4", "\tScalars 60", "\tBinary object bytes 1234", ] self.assertEqual(output.split("\n"), expected_lines)
def _request_json_data(self, experiment_id, read_time): """Given experiment id, generates JSON data and destination file name. The JSON data describes the run, tag, metadata, in addition to - Actual data in the case of scalars - Pointer to binary files in the case of blob sequences. For the case of blob sequences, this method has the side effect of downloading the contents of the blobs and writing them to files in a subdirectory of the experiment directory. Args: experiment_id: The id of the experiment to request data for. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: (JSON-serializable data, destination file name) tuples. """ request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = experiment_id util.set_timestamp(request.read_timestamp, read_time) # No special error handling as we don't expect any errors from these # calls: all experiments should exist (read consistency timestamp) # and be owned by the calling user (only queried for own experiment # IDs). Any non-transient errors would be internal, and we have no # way to efficiently resume from transient errors because the server # does not support pagination. stream = self._api.StreamExperimentData( request, metadata=grpc_util.version_metadata() ) for response in stream: metadata = base64.b64encode( response.tag_metadata.SerializeToString() ).decode("ascii") json_data = { u"run": response.run_name, u"tag": response.tag_name, u"summary_metadata": metadata, } filename = None if response.HasField("points"): json_data[u"points"] = self._process_scalar_points( response.points ) filename = _FILENAME_SCALARS elif response.HasField("tensors"): json_data[u"points"] = self._process_tensor_points( response.tensors, experiment_id ) filename = _FILENAME_TENSORS elif response.HasField("blob_sequences"): json_data[u"points"] = self._process_blob_sequence_points( response.blob_sequences, experiment_id ) filename = _FILENAME_BLOB_SEQUENCES if filename: yield json_data, filename
def _request_experiment_ids(self, read_time): """Yields all of the calling user's experiment IDs, as strings.""" request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) stream = self._api.StreamExperiments(request) for response in stream: for experiment_id in response.experiment_ids: yield experiment_id
def test_set_timestamp(self): pb = timestamp_pb2.Timestamp() t = 1234567890.007812500 # Note that just multiplying by 1e9 would lose precision: self.assertEqual(int(t * 1e9) % int(1e9), 7812608) util.set_timestamp(pb, t) self.assertEqual(pb.seconds, 1234567890) self.assertEqual(pb.nanos, 7812500)
def get_scalars( self, runs_filter=None, tags_filter=None, pivot=False, include_wall_time=False, ): # NOTE(#3650): Import pandas early in this method, so if the # Python environment does not have pandas installed, an error can be # raised early, before any rpc call is made. pandas = import_pandas() if runs_filter is not None: raise NotImplementedError( "runs_filter support for get_scalars() is not implemented yet." ) if tags_filter is not None: raise NotImplementedError( "tags_filter support for get_scalars() is not implemented yet." ) request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = self._experiment_id read_time = time.time() util.set_timestamp(request.read_timestamp, read_time) # TODO(cais, wchargin): Use another rpc to check for staleness and avoid # a new StreamExperimentData rpc request if data is not stale. stream = self._api_client.StreamExperimentData( request, metadata=grpc_util.version_metadata() ) runs = [] tags = [] steps = [] wall_times = [] values = [] for response in stream: # TODO(cais, wchargin): Display progress bar during data loading. num_values = len(response.points.values) runs.extend([response.run_name] * num_values) tags.extend([response.tag_name] * num_values) steps.extend(list(response.points.steps)) wall_times.extend( [t.ToNanoseconds() / 1e9 for t in response.points.wall_times] ) values.extend(list(response.points.values)) data = { "run": runs, "tag": tags, "step": steps, "value": values, } if include_wall_time: data["wall_time"] = wall_times dataframe = pandas.DataFrame(data) if pivot: dataframe = self._pivot_dataframe(dataframe) return dataframe
def _run(self, t=None, tz=None): timestamp_pb = timestamp_pb2.Timestamp() util.set_timestamp(timestamp_pb, t) try: with mock.patch.dict(os.environ, {"TZ": tz}): time.tzset() return util.format_time_absolute(timestamp_pb) finally: time.tzset()
def _run(self, t=None, now=None): timestamp_pb = timestamp_pb2.Timestamp() util.set_timestamp(timestamp_pb, t) try: with mock.patch.dict(os.environ, {"TZ": "UTC"}): time.tzset() now = datetime.datetime.fromtimestamp(now) return util.format_time(timestamp_pb, now=now) finally: time.tzset()
def stream_experiments(request, **kwargs): del request # unused self.assertEqual(kwargs["metadata"], grpc_util.version_metadata()) response = export_service_pb2.StreamExperimentsResponse() response.experiments.add(experiment_id="123") response.experiments.add(experiment_id="456") yield response response = export_service_pb2.StreamExperimentsResponse() experiment = response.experiments.add() experiment.experiment_id = "789" experiment.name = "bert" experiment.description = "ernie" util.set_timestamp(experiment.create_time, 981173106) util.set_timestamp(experiment.update_time, 1015218367) yield response
def get_scalars(self, runs_filter=None, tags_filter=None, pivot=None): if runs_filter is not None: raise NotImplementedError( "runs_filter support for get_scalars() is not implemented yet." ) if tags_filter is not None: raise NotImplementedError( "tags_filter support for get_scalars() is not implemented yet." ) pivot = True if pivot is None else pivot request = export_service_pb2.StreamExperimentDataRequest() request.experiment_id = self._experiment_id read_time = time.time() util.set_timestamp(request.read_timestamp, read_time) # TODO(cais, wchargin): Use another rpc to check for staleness and avoid # a new StreamExperimentData rpc request if data is not stale. stream = self._api_client.StreamExperimentData( request, metadata=grpc_util.version_metadata()) runs = [] tags = [] steps = [] wall_times = [] values = [] for response in stream: # TODO(cais, wchargin): Display progress bar during data loading. num_values = len(response.points.values) runs.extend([response.run_name] * num_values) tags.extend([response.tag_name] * num_values) steps.extend(list(response.points.steps)) wall_times.extend( [t.ToNanoseconds() / 1e9 for t in response.points.wall_times]) values.extend(list(response.points.values)) dataframe = pandas.DataFrame({ "run": runs, "tag": tags, "step": steps, "wall_time": wall_times, "value": values, }) if pivot: dataframe = self._pivot_dataframe(dataframe) return dataframe
def _create_point(self, tag_proto, event, value, run_name): """Adds a tensor point to the given tag, if there's space. Args: tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. event: Enclosing `Event` proto with the step and wall time data. value: Tensor `Summary.Value` proto with the actual tensor data. run_name: Name of the wrong, only used for error reporting. Raises: _OutOfSpaceError: If adding the point would exceed the remaining request budget. """ point = tag_proto.points.add() point.step = event.step point.value.CopyFrom(value.tensor) util.set_timestamp(point.wall_time, event.wall_time) self._num_values += 1 self._tensor_bytes += point.value.ByteSize() if point.value.ByteSize() > self._max_tensor_point_size: logger.warning( "Tensor (run:%s, tag:%s, step: %d) too large; skipping. " "Size %d exceeds limit of %d bytes.", run_name, tag_proto.name, event.step, point.value.ByteSize(), self._max_tensor_point_size, ) tag_proto.points.pop() self._num_values_skipped += 1 self._tensor_bytes_skipped += point.value.ByteSize() return self._validate_tensor_value(value.tensor, value.tag, event.step, event.wall_time) try: self._byte_budget_manager.add_point(point) except _OutOfSpaceError: tag_proto.points.pop() raise
def list_experiments(api_client, fieldmask=None, read_time=None): """Yields all of the calling user's experiments. Args: api_client: A TensorBoardExporterService stub instance. fieldmask: An optional `experiment_pb2.ExperimentMask` value. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: For each experiment owned by the user, an `experiment_pb2.Experiment` value. Raises: RuntimeError: If the server returns experiment IDs but no experiments, as in an old, unsupported version of the protocol. """ if read_time is None: read_time = time.time() request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) if fieldmask: request.experiments_mask.CopyFrom(fieldmask) stream = api_client.StreamExperiments( request, metadata=grpc_util.version_metadata() ) for response in stream: if response.experiments: for experiment in response.experiments: yield experiment elif response.experiment_ids: raise RuntimeError( "Server sent experiment_ids without experiments: <%r>" % (list(response.experiment_ids),) ) else: # No data: not technically a problem, but not expected. logger.warning( "StreamExperiments RPC returned response with no experiments: <%r>", response, )
def _create_point(self, tag_proto, event, value): """Adds a scalar point to the given tag, if there's space. Args: tag_proto: `WriteScalarRequest.Tag` proto to which to add a point. event: Enclosing `Event` proto with the step and wall time data. value: Scalar `Summary.Value` proto with the actual scalar data. Raises: _OutOfSpaceError: If adding the point would exceed the remaining request budget. """ point = tag_proto.points.add() point.step = event.step # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set point.value = tensor_util.make_ndarray(value.tensor).item() util.set_timestamp(point.wall_time, event.wall_time) try: self._byte_budget_manager.add_point(point) except _OutOfSpaceError: tag_proto.points.pop() raise
def list_experiments(api_client, read_time=None): """Yields all of the calling user's experiment IDs. Args: api_client: A TensorBoardExporterService stub instance. read_time: A fixed timestamp from which to export data, as float seconds since epoch (like `time.time()`). Optional; defaults to the current time. Yields: One string for each experiment owned by the calling user, in arbitrary order. """ if read_time is None: read_time = time.time() request = export_service_pb2.StreamExperimentsRequest(limit=_MAX_INT64) util.set_timestamp(request.read_timestamp, read_time) stream = api_client.StreamExperiments( request, metadata=grpc_util.version_metadata()) for response in stream: for experiment_id in response.experiment_ids: yield experiment_id