def test_non_ascii_data_non_utf_locale_coding_orjson(self): util.set_json_lib("orjson") original_data = "čććžšđ" # Default (UTF-8) locale result = util.json_encode(original_data) self.assertEqual(result, '"%s"' % (original_data)) loaded = util.json_decode(result) self.assertEqual(loaded, original_data) # Non UTF-8 Locale os.environ["LC_ALL"] = "invalid" result = util.json_encode(original_data) self.assertEqual(result, '"%s"' % (original_data)) loaded = util.json_decode(result) self.assertEqual(loaded, original_data) # Invalid UTF-8, should fall back to standard json implementation original_data = "\ud800" result = util.json_encode(original_data) self.assertEqual(result, '"\\ud800"') loaded = util.json_decode('"\ud800"') self.assertEqual(loaded, original_data)
def __runtest(library): self._setlib(library) if library == FALLBACK or not isinstance(obj, (JsonArray, JsonObject)): text2 = util.json_encode(obj) self.assertEquals(text, text2) obj2 = util.json_decode(text2) text3 = util.json_encode(obj2) self.assertEquals(text, text3) else: self.assertRaises(TypeError, lambda: util.json_encode(obj))
def __runtest(library): original_lib = util.get_json_lib() self._setlib(library) try: text2 = util.json_encode(obj) self.assertEquals(text, text2) obj2 = util.json_decode(text2) text3 = util.json_encode(obj2) self.assertEquals(text, text3) finally: util._set_json_lib(original_lib)
def create_configuration(extra_toplevel_config=None): """Creates a blank configuration file with default values. Optionally overwrites top-level key/values. Sets api_key to 'fake' unless defined in extra_toplevel_config @param extra_toplevel_config: Dict of top-level key/value objects to overwrite. @return: The configuration object @rtype: Configuration """ config_dir = tempfile.mkdtemp() config_file = os.path.join(config_dir, "agentConfig.json") config_fragments_dir = os.path.join(config_dir, "configs.d") os.makedirs(config_fragments_dir) toplevel_config = {"api_key": "fake"} if extra_toplevel_config: toplevel_config.update(extra_toplevel_config) fp = open(config_file, "w") fp.write(scalyr_util.json_encode(toplevel_config)) fp.close() default_paths = DefaultPaths( "/var/log/scalyr-agent-2", "/etc/scalyr-agent-2/agent.json", "/var/lib/scalyr-agent-2", ) config = Configuration(config_file, default_paths, None) config.parse() # we need to delete the config dir when done atexit.register(shutil.rmtree, config_dir) return config
def _write_bad_checkpoint_file(path): """ Write invalid JSON in file located in "path" """ fp = open(path, "w") fp.write(scalyr_util.json_encode("}data{,:,,{}")) fp.close()
def _format_key_value(self, format, key, value, emit_raw_details, detect_escaped_strings): if emit_raw_details or (detect_escaped_strings and value.startswith('"') and value.endswith('"')): return format % (key, value) else: return format % (key, scalyr_util.json_encode(value))
def test_str_conversion(self): self.assertEquals(self.get(1, convert_to=six.text_type), "1") self.assertEquals(self.get("ah", convert_to=six.text_type), "ah") self.assertEquals(self.get(False, convert_to=six.text_type), "False") self.assertEquals(self.get(1.3, convert_to=six.text_type), "1.3") self.assertEquals(self.get(1, convert_to=six.text_type), "1") test_array = ["a", "b", "c"] # str -> ArrayOfStrings (must support different variations) arr = ArrayOfStrings(test_array) self.assertEquals(self.get("a,b,c", convert_to=ArrayOfStrings), arr) self.assertEquals(self.get("a,b, c", convert_to=ArrayOfStrings), arr) self.assertEquals(self.get('"a", "b", "c"', convert_to=ArrayOfStrings), arr) self.assertEquals(self.get("'a', 'b', 'c'", convert_to=ArrayOfStrings), arr) self.assertEquals(self.get("[a, b, c]", convert_to=ArrayOfStrings), arr) self.assertEquals( self.get("['a', \"b\", c]", convert_to=ArrayOfStrings), arr) # str -> JsonArray self.assertEquals( self.get(scalyr_util.json_encode(test_array), convert_to=JsonArray), JsonArray(*test_array), ) self.assertRaises( BadMonitorConfiguration, # single quotes are invalid JSON lambda: self.assertEquals( self.get(six.text_type(test_array), convert_to=JsonArray), JsonArray(*test_array), ), ) # str -> JsonObject test_obj = {"a": 1, "b": "two", "c": [1, 2, 3]} self.assertEquals( self.get(scalyr_util.json_encode(test_obj), convert_to=JsonObject), scalyr_util.json_scalyr_config_decode( scalyr_util.json_encode(test_obj)), )
def parse_scalyr_request(payload): """Parses a payload encoded with the Scalyr-specific JSON optimizations. The only place these optimizations are used are creating `AddEvent` requests. NOTE: This method is very fragile and just does enough conversion to support the tests. It could lead to erroneous results if patterns like "`s" and colons are used in strings content. It is also not optimized for performance. :param payload: The request payload :type payload: bytes :return: The parsed request body :rtype: dict """ # Our general strategy is to rewrite the payload to be standard JSON and then use the # standard JSON libraries to parse it. There are two main optimizations we need to undo # here: length-prefixed strings and not using quotes around key names in JSON objects. # First, look for the length-prefixed strings. These are marked by "`sXXXX" where XXXX is a four # byte integer holding the number of bytes in the string. This precedes the string. So we find # all of those and replace them with quotes. We also have to escape the string. # NOTE: It is very important all of our regex work against byte strings because our input is in bytes. length_prefix_re = re.compile(b"`s(....)", re.DOTALL) # Accumulate the rewrite of `payload` here. We will eventually parse this as standard JSON. rewritten_payload = b"" # The index of `payload` up to which we have processed (copied into `rewritten_payload`). last_processed_index = -1 for x in length_prefix_re.finditer(payload): # First add in the bytes between the last processed and the start of this match. rewritten_payload += payload[last_processed_index + 1:x.start(0)] # Read the 4 bytes that describe the length, which is stored in regex group 1. # 2->TODO struct.pack|unpack in python < 2.7.7 does not allow unicode format string. length = compat.struct_unpack_unicode(">i", x.group(1))[0] # Grab the string content as raw bytes. raw_string = payload[x.end(1):x.end(1) + length] text_string = raw_string.decode("utf-8", "replace") rewritten_payload += scalyr_util.json_encode(text_string, binary=True) last_processed_index = x.end(1) + length - 1 rewritten_payload += payload[last_processed_index + 1:len(payload)] # Now convert all places where we do not have quotes around key names to have quotes. # This is pretty fragile.. we look for anything like # foo: # and convert it to # "foo": rewritten_payload = re.sub(b"([\\w\\-]+):", b'"\\1":', rewritten_payload) # NOTE: Special case for Windows where path is C:\ which we don't want to convert rewritten_payload = rewritten_payload.replace(b'"C":\\', b"C:\\") # do the same for the low-case. rewritten_payload = rewritten_payload.replace(b'"c":\\', b"c:\\") return scalyr_util.json_decode(rewritten_payload.decode( "utf-8", "replace"))
def __runtest(library): original_lib = util.get_json_lib() self._setlib(library) try: text2 = util.json_encode(obj) self.assertEquals( sorted(six.ensure_text(text)), sorted(text2), "%s != %s" % (str(text), str(text2)), ) obj2 = util.json_decode(text2) text3 = util.json_encode(obj2) self.assertEquals( sorted(six.ensure_text(text)), sorted(text3), "%s != %s" % (str(text), str(text3)), ) obj3 = util.json_decode(text) self.assertEquals(obj3, obj) # Sanity test to ensure curly brace is always the last character when serializing # a dict. # Our "rewind to last curly brace" logic in scalyr_agent/scalyr_client.py relies on # this behavior. values = [ {}, { "a": "b" }, { "a": 1, "b": 2 }, ] for value in values: result = util.json_encode(value) self.assertEqual(result[-1], "}") finally: util.set_json_lib(original_lib)
def test_non_ascii_data_non_utf_locale_coding_default_json_lib(self): util.set_json_lib("json") original_data = "čććžšđ" # Default (UTF-8) locale result = util.json_encode(original_data) self.assertEqual(result, '"\\u010d\\u0107\\u0107\\u017e\\u0161\\u0111"') loaded = util.json_decode(result) self.assertEqual(loaded, original_data) # Non UTF-8 Locale os.environ["LC_ALL"] = "invalid" result = util.json_encode(original_data) self.assertEqual(result, '"\\u010d\\u0107\\u0107\\u017e\\u0161\\u0111"') loaded = util.json_decode(result) self.assertEqual(loaded, original_data)
def format_msg( self, metric_name, metric_value, extra_fields=None, ): string_buffer = StringIO() string_buffer.write( "%s %s" % (metric_name, scalyr_util.json_encode(metric_value))) if extra_fields is not None: for field_name in extra_fields: field_value = extra_fields[field_name] string_buffer.write( " %s=%s" % (field_name, scalyr_util.json_encode(field_value))) msg = string_buffer.getvalue() string_buffer.close() return msg
def create_copying_manager(self, config): if 'api_key' not in config: config['api_key'] = 'fake' f = open(self._config_file, "w") if f: f.write(scalyr_util.json_encode(config)) f.close() default_paths = DefaultPaths(self._log_dir, self._config_file, self._data_dir) configuration = Configuration(self._config_file, default_paths, None) configuration.parse() self._manager = TestableCopyingManager(configuration, []) self._controller = self._manager.controller
def create_copying_manager(self, config, monitor_agent_log=False): if "api_key" not in config: config["api_key"] = "fake" if not monitor_agent_log: config["implicit_agent_log_collection"] = False f = open(self._config_file, "w") if f: f.write(scalyr_util.json_encode(config)) f.close() default_paths = DefaultPaths(self._log_dir, self._config_file, self._data_dir) configuration = Configuration(self._config_file, default_paths, None) configuration.parse() self._manager = TestableCopyingManager(configuration, []) self._controller = self._manager.controller
def _add_non_utf8_to_checkpoint_file(path): """ Add a unicode character to the checkpoint data stored in file located in "path" """ fp = open(path, "r") data = scalyr_util.json_decode(fp.read()) fp.close() # 2-> TODO json libraries do not allow serialize bytes string with invalid UTF-8(ujson)or even bytes in general(json). # so to test this case we must write non-utf8 byte directly, without serializing. # this string will be replaced with invalid utf8 byte after encoding. data["test"] = "__replace_me__" json_string = scalyr_util.json_encode(data, binary=True) # replace prepared substring to invalid byte. json_string = json_string.replace(b"__replace_me__", b"\x96") fp = open(path, "wb") fp.write(json_string) fp.close()
def run(self): """Begins executing the monitor, writing metric output to logger. """ if self.__disable_monitor: global_log.info( "kubernetes_events_monitor exiting because it has been disabled." ) return try: self._global_config.k8s_api_url self._global_config.k8s_verify_api_queries # We only create the k8s_cache while we are the leader k8s_cache = None if self.__log_watcher: self.log_config = self.__log_watcher.add_log_config( self.module_name, self.log_config ) # First instance of k8s api uses the main rate limiter. Leader election related API calls to the k8s # masters will go through this api/rate limiter. k8s_api_main = KubernetesApi.create_instance( self._global_config, rate_limiter_key="K8S_CACHE_MAIN_RATELIMITER" ) # Second instance of k8s api uses an ancillary ratelimiter (for exclusive use by events monitor) k8s_api_events = KubernetesApi.create_instance( self._global_config, rate_limiter_key="K8S_EVENTS_RATELIMITER" ) # k8s_cache is initialized with the main rate limiter. However, streaming-related API calls should go # through the ancillary ratelimiter. This is achieved by passing ApiQueryOptions with desired rate_limiter. k8s_events_query_options = ApiQueryOptions( max_retries=self._global_config.k8s_controlled_warmer_max_query_retries, rate_limiter=k8s_api_events.default_query_options.rate_limiter, ) pod_name = k8s_api_main.get_pod_name() self._node_name = k8s_api_main.get_node_name(pod_name) cluster_name = k8s_api_main.get_cluster_name() last_event = None last_resource = 0 last_check = time.time() - self._leader_check_interval last_reported_leader = None while not self._is_thread_stopped(): current_time = time.time() # if we are the leader, we could be going through this loop before the leader_check_interval # has expired, so make sure to only check for a new leader if the interval has expired if last_check + self._leader_check_interval <= current_time: last_check = current_time # check if we are the leader if not self._is_leader(k8s_api_main): # if not, then sleep and try again global_log.log( scalyr_logging.DEBUG_LEVEL_1, "Leader is %s" % (six.text_type(self._current_leader)), ) if ( self._current_leader is not None and last_reported_leader != self._current_leader ): global_log.info( "Kubernetes event leader is %s" % six.text_type(self._current_leader) ) last_reported_leader = self._current_leader if k8s_cache is not None: k8s_cache.stop() k8s_cache = None self._sleep_but_awaken_if_stopped(self._leader_check_interval) continue global_log.log( scalyr_logging.DEBUG_LEVEL_1, "Leader is %s" % (six.text_type(self._current_leader)), ) try: if last_reported_leader != self._current_leader: global_log.info("Acting as Kubernetes event leader") last_reported_leader = self._current_leader if k8s_cache is None: # create the k8s cache k8s_cache = k8s_utils.cache(self._global_config) # start streaming events lines = k8s_api_events.stream_events(last_event=last_event) json = {} for line in lines: try: json = scalyr_util.json_decode(line) except Exception as e: global_log.warning( "Error parsing event json: %s, %s, %s" % (line, six.text_type(e), traceback.format_exc()) ) continue try: # check to see if the resource version we are using has expired if self._is_resource_expired(json): last_event = None global_log.log( scalyr_logging.DEBUG_LEVEL_1, "K8S resource expired" ) continue obj = json.get("object", dict()) event_type = json.get("type", "UNKNOWN") # resource version hasn't expired, so update it to the most recently seen version last_event = last_resource metadata = obj.get("metadata", dict()) # skip any events with resourceVersions higher than ones we've already seen resource_version = metadata.get("resourceVersion", None) if resource_version is not None: resource_version = int(resource_version) if resource_version and resource_version <= last_resource: global_log.log( scalyr_logging.DEBUG_LEVEL_2, "Skipping older resource events", ) continue last_resource = resource_version last_event = resource_version # see if this event is about an object we are interested in (kind, namespace, name) = self._get_involved_object(obj) if kind is None: global_log.log( scalyr_logging.DEBUG_LEVEL_1, "Ignoring event due to None kind", ) continue # exclude any events that don't involve objects we are interested in if ( self.__event_object_filter and kind not in self.__event_object_filter ): global_log.log( scalyr_logging.DEBUG_LEVEL_1, "Ignoring event due to unknown kind %s - %s" % (kind, six.text_type(metadata)), ) continue # ignore events that belong to namespaces we are not interested in if namespace not in self.__k8s_namespaces_to_include: global_log.log( scalyr_logging.DEBUG_LEVEL_1, "Ignoring event due to belonging to an excluded namespace '%s'" % (namespace), ) continue # get cluster and deployment information extra_fields = { "k8s-cluster": cluster_name, "watchEventType": event_type, } if kind: if kind == "Pod": extra_fields["pod_name"] = name extra_fields["pod_namespace"] = namespace pod = k8s_cache.pod( namespace, name, current_time, query_options=k8s_events_query_options, ) if pod and pod.controller: extra_fields[ "k8s-controller" ] = pod.controller.name extra_fields["k8s-kind"] = pod.controller.kind elif kind != "Node": controller = k8s_cache.controller( namespace, name, kind, current_time, query_options=k8s_events_query_options, ) if controller: extra_fields["k8s-controller"] = controller.name extra_fields["k8s-kind"] = controller.kind # if so, log to disk self.__disk_logger.info( "event=%s extra=%s" % ( six.text_type(scalyr_util.json_encode(obj)), six.text_type( scalyr_util.json_encode(extra_fields) ), ) ) # see if we need to check for a new leader if last_check + self._leader_check_interval <= current_time: global_log.log( scalyr_logging.DEBUG_LEVEL_1, "Time to check for a new event leader", ) break except Exception as e: global_log.exception( "Failed to process single k8s event line due to following exception: %s, %s, %s" % (repr(e), six.text_type(e), traceback.format_exc()), limit_once_per_x_secs=300, limit_key="k8s-stream-events-general-exception", ) except K8sApiAuthorizationException: global_log.warning( "Could not stream K8s events due to an authorization error. The " "Scalyr Service Account does not have permission to watch available events. " "Please recreate the role with the latest definition which can be found " "at https://raw.githubusercontent.com/scalyr/scalyr-agent-2/release/k8s/scalyr-service-account.yaml " "K8s event collection will be disabled until this is resolved. See the K8s install " "directions for instructions on how to create the role " "https://www.scalyr.com/help/install-agent-kubernetes", limit_once_per_x_secs=300, limit_key="k8s-stream-events-no-permission", ) except ConnectionError: # ignore these, and just carry on querying in the next loop pass except Exception as e: global_log.exception( "Failed to stream k8s events due to the following exception: %s, %s, %s" % (repr(e), six.text_type(e), traceback.format_exc()) ) if k8s_cache is not None: k8s_cache.stop() k8s_cache = None except Exception: # TODO: Maybe remove this catch here and let the higher layer catch it. However, we do not # right now join on the monitor threads, so no one would catch it. We should change that. global_log.exception( "Monitor died due to exception:", error_code="failedMonitor" )
def run_benchmark(): return json_encode(data)