def test_simple_prototype_cache(self): signature = EnsembleSignature( signatures=[ParentChildByNameTopologySignature()]) cache = signature.prototype_signature_cache_class( supported={ProcessExitEvent: True}) tree = simple_unique_node_tree() for node in tree.nodes(): tokens = signature.get_signature(node, parent=node.parent()) if not cache.node_count(): # nothing added yet self.assertEqual([], cache.get(signature=tokens)) else: self.assertTrue( isinstance(cache.get(signature=tokens)[0], dict)) cache[tokens, tree, ProcessExitEvent] = {"value": 1} stats = cache.get_statistics(signature=tokens, prototype=tree, key="value", event_type=ProcessExitEvent) self.assertTrue(stats[0].count() >= 1) self.assertEqual([4], cache.node_count()) self.assertEqual([4], cache.multiplicity()) signature = EnsembleSignature( signatures=[ParentChildByNameTopologySignature()]) cache = signature.signature_cache_class() for node in simple_monitoring_tree().nodes(): cache[signature.get_signature(node, parent=node.parent()), ProcessStartEvent] = { "value": 0 } self.assertEqual([3], cache.node_count()) self.assertEqual([4], cache.multiplicity())
def test_simple_cache(self): signature = EnsembleSignature( signatures=[ParentChildByNameTopologySignature()]) cache = signature.signature_cache_class() for node in simple_unique_node_tree().nodes(): tokens = signature.get_signature(node, parent=node.parent()) if not cache.node_count(): # nothing added yet self.assertEqual([], cache.multiplicity(signature=tokens)) else: self.assertEqual([0], cache.multiplicity(signature=tokens)) cache[tokens, ProcessStartEvent] = {"value": 0} self.assertEqual([1], cache.multiplicity(signature=tokens)) self.assertEqual([4], cache.node_count()) self.assertEqual([4], cache.multiplicity()) signature = EnsembleSignature( signatures=[ParentChildByNameTopologySignature()]) cache = signature.signature_cache_class() for node in simple_monitoring_tree().nodes(): cache[signature.get_signature(node, parent=node.parent()), ProcessStartEvent] = { "value": 0 } self.assertEqual([3], cache.node_count()) self.assertEqual([4], cache.multiplicity())
def test_two_prototype_ensembles(self): signature = EnsembleSignature(signatures=[ ParentChildByNameTopologySignature(), ParentChildOrderTopologySignature() ]) cache = signature.prototype_signature_cache_class() prototype = simple_unique_node_tree() for node in prototype.nodes(): tokens = signature.get_signature(node, parent=node.parent()) if not cache.node_count(): self.assertEqual([], cache.get(signature=tokens)) else: received = cache.get(signature=tokens) self.assertEqual(2, len(received)) self.assertTrue(isinstance(received[0], dict)) cache[tokens, prototype, ProcessStartEvent] = {"value": 1} received = cache.get(signature=tokens) self.assertEqual(2, len(received)) self.assertTrue(isinstance(received[0], dict)) self.assertTrue( received[0][prototype][ProcessStartEvent]["value"].count() >= 1 ) self.assertEqual([4, 4], cache.node_count()) self.assertEqual([4, 4], cache.multiplicity())
def test_simple_signature(self): signature = EnsembleSignature(signatures=[ParentChildByNameTopologySignature()]) self.assertEqual(1, signature.count) for node in simple_monitoring_tree().nodes(): signature.prepare_signature(node, parent=node.parent()) for node in simple_monitoring_tree().nodes(): token = signature.get_signature(node, parent=node.parent()) self.assertIsNotNone(token) self.assertEqual(1, len(token))
def test_two_ensembles_different_frequency(self): signature = EnsembleSignature(signatures=[ ParentChildByNameTopologySignature(), ParentChildOrderTopologySignature() ]) cache = signature.signature_cache_class() for node in simple_monitoring_tree().nodes(): cache[signature.get_signature(node, parent=node.parent()), ProcessStartEvent] = { "value": 0 } self.assertEqual([3, 4], cache.node_count()) self.assertEqual([4, 4], cache.multiplicity())
def test_two_prototype_ensembles_different_frequency(self): signature = EnsembleSignature(signatures=[ ParentChildByNameTopologySignature(), ParentChildOrderTopologySignature() ]) cache = signature.prototype_signature_cache_class( supported={ProcessExitEvent: True}) prototype = simple_monitoring_tree() for node in prototype.nodes(): cache[signature.get_signature(node, parent=node.parent()), prototype, ProcessExitEvent] = { "value": 1 } self.assertEqual([3, 4], cache.node_count()) self.assertEqual([4, 4], cache.multiplicity())
def test_two_ensembles(self): signature = EnsembleSignature(signatures=[ ParentChildByNameTopologySignature(), ParentChildOrderTopologySignature() ]) cache = signature.signature_cache_class() for node in simple_unique_node_tree().nodes(): tokens = signature.get_signature(node, parent=node.parent()) if not cache.node_count(): self.assertEqual([], cache.multiplicity(signature=tokens)) else: self.assertEqual([0, 0], cache.multiplicity(signature=tokens)) cache[tokens, ProcessStartEvent] = {"value": 0} self.assertEqual([1, 1], cache.multiplicity(signature=tokens)) self.assertEqual([4, 4], cache.node_count()) self.assertEqual([4, 4], cache.multiplicity())
class TreeDistanceAlgorithm(object): """ The class TreeDistanceAlgorithm creates the API for different algorithms calculating the distance between trees. The most important methods to consider for implementation of a new distance algorithm are * node_count_for_prototype, * prototypes_converted_for_algorithm, * start_tree and finish_tree, * _add_event, and * _update_distance """ __slots__ = ("_signature", "_cache_statistics", "_signature_prototypes", "_distance", "_prototypes", "_tree", "_tree_dict", "_event_counter", "supported", "_maxlen", "__dict__") def __init__(self, signature: Signature = None, cache_statistics=None): if signature is None: signature = Signature() if not isinstance(signature, EnsembleSignature): self._signature = EnsembleSignature(signatures=[signature]) else: self._signature = signature # signature caches self._cache_statistics = cache_statistics self._signature_prototypes = self._signature.prototype_signature_cache_class( statistics_cls=self._cache_statistics) self._distance = None self._prototypes: List[Prototype] = [] self._tree: Tree = None self._tree_dict = ObjectCache() self._event_counter: int = 0 self.supported: Dict[Event, bool] = { ProcessStartEvent: True, ProcessExitEvent: False, TrafficEvent: False, ParameterEvent: False } self._maxlen: int = None @property def signature(self): """ Property to access the signature that is used by TreeDistanceAlgorithm. :return: Current signature """ return self._signature @property def tree(self): """ Property to access the tree that is cached within the TreeDistanceAlgorithm. :return: Current tree """ return self._tree @property def prototypes(self): """ Property that gives access to the prototypes being used for distance calculations. :return: List of prototoypes """ return self._prototypes[:self._maxlen] @prototypes.setter def prototypes(self, value=None): """ Setter method to set the current list of prototypes to be used for distance measurements. :param value: List of prototypes """ # clean old prototypes first... self._signature_prototypes = self._signature.prototype_signature_cache_class( statistics_cls=self._cache_statistics, supported=self.supported) for prototype in value: # store links to nodes based on node_ids into dictionary prototype.to_prototype(signature=self.signature, supported=self.supported, cache=self._signature_prototypes) self._prototypes = value @property def signature_prototypes(self): """ Method that returns the signatures of the current prototypes being used for distance measurements. :return: Signatures for all protototypes """ return self._signature_prototypes def cluster_representatives(self, signature_prototypes=None, prototypes=None): """ Method that sets the signatures and cluster names for cluster represenatives to check. :param signature_prototypes: Signature for all cluster representatives :param prototypes: Cluster name per cluster representative """ # FIXME: here might be another implementation if isinstance(signature_prototypes, list): self._signature_prototypes = \ self._signature.prototype_signature_cache_class.\ from_prototype_signature_caches(signature_prototypes) else: self._signature_prototypes = signature_prototypes self._prototypes = prototypes def tree_node_counts(self, signature=False): """ Returns the list of count of nodes for monitoring tree per prototype. If signature is True, the count of nodes based on signature is used. Signature defaults to False. Format is like this: [ve1, ..., ven] :param signature: Determines if node count depends on signature, defaults to False :return: List of counts for monitoring tree per prototype """ if signature: try: return self.distance.node_count() except AttributeError: try: count = self._tree.node_count() except AttributeError: count = 0 else: try: count = self._tree.node_count() except AttributeError: count = 0 return [count for _ in range(self._signature.count)] if count > 0 else [] def prototype_node_counts(self, signature=False): """ Returns the count of nodes per prototype tree. If signature is True, the count for converted prototypes based on given signature is returned. Signature defaults to False. Format is like this: [[e1p1, ... e1pn], ..., [enp1, ..., enpn]] :param signature: Determines if node count depends on signature, defaults to False :return: List of counts for prototypes """ if signature: return [ list(element) for element in zip(*[ self._signature_prototypes.node_count(prototype=prototype) for prototype in self._prototypes ]) ] try: return [[prototype.node_count() for prototype in self._prototypes] for _ in range(self._signature.count)] except AttributeError: # working a Cluster Representative # TODO: clean this up a bit... pass return None def tree_event_counts(self): try: event_counts = self.distance.node_count() except AttributeError: try: event_counts = self._tree.node_count() except AttributeError: event_counts = 0 return [event_counts for _ in range(self._signature.count)] \ if event_counts > 0 else [] def prototype_event_counts(self, by_event=False): """ Method returns a list containing the events per prototype. Format is like this: [[e1p1, ..., e1pn], ..., [enp1, ..., enpn]] :return: List of event counts per prototype """ try: event_counts = self.distance.node_count( prototypes=self._prototypes, signature_prototypes=self._signature_prototypes, by_event=by_event) except AttributeError: return self._prototype_event_counts() return [list(element) for element in zip(*event_counts)] def node_counts(self): return self._node_count() def event_counts(self, by_event=False): """ Method returns a list containing the current events considered from the monitoring tree by prototype. Returned format looks like: [[e1p1, ..., e1pn], ..., [enp1, ..., enpn]] :return: List of monitoring tree event counts per prototype """ return self._event_count(by_event=by_event) def start_tree(self, maxlen=None, **kwargs): """ Method that should be called before a new event stream is started. It takes care on initialising things. :param maxlen: How many prototypes are considered for distance measurement. """ self._tree = Tree() self._tree_dict = ObjectCache() self._event_counter = 0 assert maxlen is None or maxlen <= len(self._prototypes) self._maxlen = maxlen def finish_tree(self): """ Method that should be called after the event stream has been finished. Some of the algorithms might rely on this method to be called. :return: Returns final distance after all events have been applied. """ return None def add_events(self, eventgenerator, **kwargs): """ Convenience method that takes an event generator and calls method add_event for each event that is yielded. :param eventgenerator: Event generator yielding events. :param kwargs: """ for event in eventgenerator: try: self.add_event(event, **kwargs) except EventNotSupportedException: pass def add_event(self, event, **kwargs): """ Method to add an event. For each event the actual distance from the stream object to different prototypes are calculated. The calculated distance is returned. Format that can be expected: [[v1p1e1, ..., vnpne1], ..., [v1p1en, ..., vnpnen]] Attention: This format is put into a list, because of empty nodes that might be required to put into the current tree. :param event: The event to be added to the current distance measurement. :param kwargs: :return: Returns the current distances after the event has been applied. """ # TODO: why are we actually still getting EmptyProcessEvent instances here? result = None self._event_counter += 1 if isinstance(event, ProcessStartEvent): if self.supported.get(ProcessStartEvent, False): # create node node, parent = self._create_node(event, **kwargs) signature = self._create_signature(node, parent) # added to keep information related signature for event event.signature = [signature] result = [self.update_distance(event, signature, **kwargs)] elif isinstance(event, ProcessExitEvent): # finish node to take care on empty nodes result = [] event.signature = [] node, parent = self._finish_node(event, **kwargs) signatures = self._create_signature_for_finished_node(node) for signature in signatures: if self.supported.get(ProcessStartEvent, False): start_event = ProcessStartEvent(event.tme, 0, event.pid) start_event.signature = signature result.append( self.update_distance(start_event, signature, **kwargs)) event.signature.append(signature) if self.supported.get(ProcessExitEvent, False): exit_event = ProcessExitEvent(event.tme, 0, event.pid, event.tme) exit_event.signature = signature result.append( self.update_distance(exit_event, signature, **kwargs)) event.signature.append(signature) if self.supported.get(ProcessExitEvent, False): signature = self._create_signature(node, parent) # added to keep information related signature for event event.signature.append(signature) result.append(self.update_distance(event, signature, **kwargs)) elif isinstance(event, TrafficEvent): if self.supported.get(TrafficEvent, False): result = self._process_parameter_event(event, **kwargs) else: raise EventNotSupportedException(event) elif isinstance(event, ParameterEvent): if self.supported.get(ParameterEvent, False): result = self._process_parameter_event(event, **kwargs) else: raise EventNotSupportedException(event) else: raise EventNotSupportedException(event) return result def _process_parameter_event(self, event, **kwargs): # create or reuse node node, parent = self._create_or_reuse_node(event, **kwargs) signature = self._create_signature(node, parent) # added to keep information related signature for event event.signature = [signature] return [self.update_distance(event, signature, **kwargs)] def _node_count(self): return NotImplemented def _event_count(self, by_event=False): """ Method returns the current event count of the monitoring tree. :return: Event count of monitoring tree """ count = self.tree_event_counts() return [count for _ in range(len(self._prototypes))] def _prototype_event_counts(self): """ Method returns the current event count per prototype. List format is like this: [p1, ..., pn] :return: List of event counts per prototoype """ return [[prototype.node_count() for prototype in self._prototypes] for _ in range(self._signature.count)] def _create_node(self, event, **kwargs): """ Method to create a new node in the monitoring tree based on event data that was received. :param event: Event that was received :param kwargs: Additional parameters :return: Tuple of created node and its parent """ try: parent = self._tree_dict.get_data(value=event.tme, key=event.ppid) except DataNotInCacheException: parent = None try: node = self._tree.add_node(event.name, parent=parent, tme=event.tme, pid=event.pid, ppid=event.ppid) except AttributeError: raise TreeNotStartedException() self._tree_dict.add_data(data=node, key=event.pid, value=event.tme) return node, parent def _create_or_reuse_node(self, event, **kwargs): """ Method to create a new node or reuse an existing node in the monitoring tree based on event data that was received. :param event: Event that was received :param kwargs: Additional parameters :return: Tuple of created node and its parent """ try: parent = self._tree_dict.get_data(value=event.tme, key=event.ppid) while parent.ppid == event.pid: parent = parent.parent() except DataNotInCacheException: parent = None try: for child in parent.children(): if child.name == event.name: # node does already exist and does not have to be created, reuse node = child break else: node = self._tree.add_node(event.name, parent=parent, tme=event.tme, pid=event.pid, ppid=event.ppid) node.attribute = True # attributes don't have to be remembered to be loaded from cache, # so skip this except AttributeError: raise TreeNotStartedException() return node, parent def _finish_node(self, event, **kwargs): """ Method that finishs a node based on exit event. :param event: Event that was received :param kwargs: Additional parameters :return: Tuple of finished node and its parent """ try: parent = self._tree_dict.get_data(value=event.tme, key=event.ppid) except DataNotInCacheException: parent = None except AttributeError: raise TreeNotStartedException() node = self._tree_dict.get_data(value=event.tme, key=event.pid) return node, parent def _create_signature_for_finished_node(self, node): """ Method to create the signature for finished nodes (missing nodes for windowed signatures). :param node: :return: """ return self._signature.finish_node(node) def _create_signature(self, node, parent): """ Method to create the signature of a node whilst considering its parent. :param node: The node to create the signature for :param parent: The nodes parent :return: Calculated signature """ return self._signature.get_signature(node, parent) def update_distance(self, event, signature, **kwargs): """ Method to update the current distance based on the received event and the associated signature. :param event: Event that was received :param signature: Associated signature :param kwargs: Additional parameters :return: Updated distances """ return self._update_distances(event, signature, **kwargs) def _update_distances(self, event, signature, **kwargs): """ Method to be overwritten to update distances. :param event: Event that was received :param signature: Associated signature :param kwargs: Additional parameters :return: Updated distances """ raise NotImplementedError def __repr__(self): return "%s (cache_statistics=%s, supported=%s)" % \ (self.__class__.__name__, self._cache_statistics.__name__ if self._cache_statistics else None, [key.__name__ for key, value in self.supported.items() if value]) def __getstate__(self): obj_dict = self.__dict__.copy() obj_dict["_prototypes"] = [] obj_dict["_signature_prototypes"] = type(self._signature_prototypes)() obj_dict["_tree"] = Tree() obj_dict["_tree_dict"] = type(self._tree_dict)() return obj_dict