def test_parse_heka_message(data_dir, heka_format): filename = "{}/test_telemetry_{}.heka".format(data_dir, heka_format) reference_filename = filename + '.json' # enable this to regenerate the expected json representation of the ping if False: with open(filename, "rb") as f: if "gzip" in heka_format: f = streaming_gzip_wrapper(f) # deep copy the parsed message so lazy-parsed json gets vivified msg = copy.deepcopy(message_parser.parse_heka_message(f).next()) open(reference_filename, 'w').write(json.dumps(msg, indent=4, sort_keys=True)) reference = json.load(open(reference_filename)) with open(filename, "rb") as f: if "gzip" in heka_format: f = streaming_gzip_wrapper(f) # deep copy the parsed message so lazy-parsed json gets vivified msg = copy.deepcopy(message_parser.parse_heka_message(f).next()) assert msg == reference
def test_telemetry(data_dir): filename = "{}/test_telemetry_gzip.heka".format(data_dir) with open(filename, "rb") as o: for r in message_parser.parse_heka_message(streaming_gzip_wrapper(o)): assert set(r.keys()) == top_keys assert set(r["payload"].keys()) == payload_keys filename = "{}/test_telemetry_snappy.heka".format(data_dir) with open(filename, "rb") as o: for r in message_parser.parse_heka_message(o): assert set(r.keys()) == top_keys assert set(r["payload"].keys()) == payload_keys
def test_unpack(data_dir): for t in ["plain", "snappy", "mixed", "gzip", "gzip_mixed"]: filename = "{}/test_{}.heka".format(data_dir, t) with open(filename, "rb") as o: if "gzip" in t: o = streaming_gzip_wrapper(o) msg = 0 for r, b in message_parser.unpack(o, try_snappy=True): j = json.loads(r.message.payload) assert msg == j["seq"] msg += 1 assert 10 == msg
def test_unpack_nosnappy(data_dir): expected_counts = {"plain": 10, "snappy": 0, "mixed": 5, "gzip": 10, "gzip_mixed": 5} for t in expected_counts.keys(): count = 0 filename = "{}/test_{}.heka".format(data_dir, t) with open(filename, "rb") as o: if "gzip" in t: o = streaming_gzip_wrapper(o) try: for r, b in message_parser.unpack(o, try_snappy=False): count += 1 except: pass assert expected_counts[t] == count
def test_unpack(data_dir, heka_format, try_snappy, strict, expected_count, expected_exception): count = 0 threw_exception = False filename = "{}/test_{}.heka".format(data_dir, heka_format) with open(filename, "rb") as o: if "gzip" in heka_format: o = streaming_gzip_wrapper(o) try: for r, b in message_parser.unpack(o, try_snappy=try_snappy, strict=strict): j = json.loads(r.message.payload) assert count == j["seq"] count += 1 except DecodeError: threw_exception = True assert count == expected_count assert threw_exception == expected_exception
def test_lazy_parsing(data_dir, monkeypatch): mock_parse_json = MagicMock(name='_parse_json', wraps=message_parser._parse_json) monkeypatch.setattr(message_parser, '_parse_json', mock_parse_json) # this heka message has 20 json fields, only one of which should be parsed # on first load filename = "{}/test_telemetry_gzip.heka".format(data_dir) with open(filename, "rb") as o: heka_message = message_parser.parse_heka_message(streaming_gzip_wrapper(o)).next() # should only have parsed json *once* to get the payload field (other # json/dictionary fields of the message should be parsed lazily) assert mock_parse_json.call_count == 1 # deep copying the heka message should cause the lazily evaluated fields # to be evaluated copy.deepcopy(heka_message) assert mock_parse_json.call_count == 20 # 19 lazily instantiated fields + original call
def test_unpack_strict(data_dir): expected_exceptions = {"plain": False, "snappy": True, "mixed": True, "gzip": False, "gzip_mixed": True} for t in expected_exceptions.keys(): count = 0 filename = "{}/test_{}.heka".format(data_dir, t) threw = False got_err = False with open(filename, "rb") as o: if "gzip" in t: o = streaming_gzip_wrapper(o) try: for r, b in message_parser.unpack(o, strict=True, try_snappy=False): if r.error is not None: got_err = True count += 1 except Exception as e: threw = True assert expected_exceptions[t] == threw