def to_properties(self): """ Return dataset profile related metadata Returns ------- properties : DatasetProperties The metadata as a protobuf object. """ tags = self.tags metadata = self.metadata if len(metadata) < 1: metadata = None session_timestamp = to_utc_ms(self.session_timestamp) data_timestamp = to_utc_ms(self.dataset_timestamp) return DatasetProperties( schema_major_version=1, schema_minor_version=1, session_id=self.session_id, session_timestamp=session_timestamp, data_timestamp=data_timestamp, tags=tags, metadata=metadata, )
def test_protobuf_round_trip(): now = datetime.datetime.utcnow() tags = {"k1": "rock", "k2": "scissors", "k3": "paper"} original = DatasetProfile( name="test", dataset_timestamp=now, tags=tags, ) original.track("col1", "value") original.track("col2", "value") msg = original.to_protobuf() roundtrip = DatasetProfile.from_protobuf(msg) assert roundtrip.to_protobuf() == msg assert roundtrip.name == "test" assert roundtrip.session_id == original.session_id assert to_utc_ms(roundtrip.session_timestamp) == to_utc_ms( original.session_timestamp) assert set(list(roundtrip.columns.keys())) == {"col1", "col2"} assert roundtrip.columns["col1"].counters.count == 1 assert roundtrip.columns["col2"].counters.count == 1 tags["Name"] = "test" assert set(roundtrip.tags) == set(tags) assert roundtrip.metadata == original.metadata
def test_write_delimited_multiple(): now = datetime.datetime.utcnow() original = DatasetProfile( name="test", session_id="test.session.id", session_timestamp=now, tags={"key": "value"}, metadata={"key": "value"}, ) original.track("col1", "value") output_bytes = original.serialize_delimited() multiple_entries = output_bytes for i in range(1, 5): multiple_entries += output_bytes entries = DatasetProfile.parse_delimited(multiple_entries) assert len(entries) == 5 for entry in entries: assert entry.session_id == original.session_id # Python time precisions are different assert time.to_utc_ms(entry.session_timestamp) == time.to_utc_ms( original.session_timestamp) assert entry.tags == original.tags assert entry.metadata == original.metadata
def test_write_delimited_single(): now = datetime.datetime.utcnow() original = DatasetProfile(name="test", session_id="test.session.id", session_timestamp=now, tags={ "key": "value"}, metadata={"key": "value"},) original.track("col1", "value") output_bytes = original.serialize_delimited() pos, roundtrip = DatasetProfile.parse_delimited_single(output_bytes) assert roundtrip.session_id == original.session_id # Python time precision includes nanoseconds assert time.to_utc_ms(roundtrip.session_timestamp) == time.to_utc_ms( original.session_timestamp) assert roundtrip.tags == original.tags assert roundtrip.metadata == original.metadata
def session_timestamp_ms(self): """ Return the session timestamp value in epoch milliseconds. """ return time.to_utc_ms(self.session_timestamp)