def add_event_listener( event_type: EventType, listener: Callable, blocking: bool = False, timeout: int = 900, one_shot: bool = False, ) -> bool: """Add an Event Listener""" if not callable(listener): log.error( f"Error registering {listener} of type {type(listener)} with event" f" {event_type.name}") return False log.debug(f"Registering {listener} with event {event_type.name}" f" (blocking: {blocking}, one-shot: {one_shot})") with _events_lock.write_access: if not event_listener_registered(event_type, listener): _events[event_type][listener] = { "blocking": blocking, "timeout": timeout, "one-shot": one_shot, "lock": Lock(), "pid": os.getpid(), } return True return False
def __delitem__(self, key): if self.parent_resource and isinstance(self.parent_resource, BaseResource): log.debug(f"Calling parent resource to delete tag {key} in cloud") try: if self.parent_resource.delete_tag(key): log_msg = f"Successfully deleted tag {key} in cloud" self.parent_resource._changes.add("tags") self.parent_resource.log(log_msg) log.info((f"{log_msg} for {self.parent_resource.kind}" f" {self.parent_resource.id}")) return super().__delitem__(key) else: log_msg = f"Error deleting tag {key} in cloud" self.parent_resource.log(log_msg) log.error((f"{log_msg} for {self.parent_resource.kind}" f" {self.parent_resource.id}")) except Exception as e: log_msg = f"Unhandled exception while trying to delete tag {key} in cloud:" f" {type(e)} {e}" self.parent_resource.log(log_msg, exception=e) if self.parent_resource._raise_tags_exceptions: raise else: log.exception(log_msg) else: return super().__delitem__(key)
def core_actions_processor(metrics: Metrics, search_uri: str, tls_data: TLSData, message: dict) -> None: if not isinstance(message, dict): log.error(f"Invalid message: {message}") return kind = message.get("kind") message_type = message.get("message_type") data = message.get("data") log.debug( f"Received message of kind {kind}, type {message_type}, data: {data}") if kind == "action": try: if message_type == "generate_metrics": start_time = time.time() update_metrics(metrics, search_uri, tls_data) run_time = time.time() - start_time log.debug(f"Updated metrics for {run_time:.2f} seconds") else: raise ValueError(f"Unknown message type {message_type}") except Exception as e: log.exception(f"Failed to {message_type}: {e}") reply_kind = "action_error" else: reply_kind = "action_done" reply_message = { "kind": reply_kind, "message_type": message_type, "data": data, } return reply_message
def handler(sig, frame) -> None: """Handles Ctrl+c by letting the Collector() know to shut down""" current_pid = os.getpid() if current_pid == parent_pid: reason = f"Received shutdown signal {sig}" log.debug(f"Parent caught signal {sig} - dispatching shutdown event") # Dispatch shutdown event in parent process which also causes SIGTERM to be sent # to the process group and in turn causes the shutdown event in all child # processes. dispatch_event( Event(EventType.SHUTDOWN, { "reason": reason, "emergency": False })) else: reason = f"Received shutdown signal {sig} from parent process" log.debug( f"Child with PID {current_pid} shutting down" " - you might see exceptions from interrupted worker threads") # Child's threads have 3s to shut down before the following thread will # shut them down hard. kt = threading.Thread(target=delayed_exit, name="shutdown") kt.start() # Dispatch shutdown event in child process dispatch_event( Event(EventType.SHUTDOWN, { "reason": reason, "emergency": False }), blocking=False, ) sys.exit(0)
def pre_delete(self, graph: Graph) -> bool: ec2 = aws_resource(self, "ec2", graph) security_group = ec2.SecurityGroup(self.id) remove_ingress = [] remove_egress = [] for permission in security_group.ip_permissions: if "UserIdGroupPairs" in permission and len(permission["UserIdGroupPairs"]) > 0: p = copy.deepcopy(permission) remove_ingress.append(p) log.debug(f"Adding incoming permission {p} of {self.kind} {self.dname} to removal list") for permission in security_group.ip_permissions_egress: if "UserIdGroupPairs" in permission and len(permission["UserIdGroupPairs"]) > 0: p = copy.deepcopy(permission) remove_egress.append(p) log.debug(f"Adding outgoing permission {p} of {self.kind} {self.dname} to removal list") if len(remove_ingress) > 0: security_group.revoke_ingress(IpPermissions=remove_ingress) if len(remove_egress) > 0: security_group.revoke_egress(IpPermissions=remove_egress) return True
def pre_delete(self, graph: Graph) -> bool: if self.association_id is not None: ec2 = aws_client(self, "ec2", graph=graph) ec2.disassociate_address(AssociationId=self.association_id) else: log.debug(f"No association for {self.rtdname}") return True
def action_processor(self, message: Dict) -> None: """Process incoming action messages""" if not isinstance(message, dict): log.error(f"Invalid message: {message}") return kind = message.get("kind") message_type = message.get("message_type") data = message.get("data") log.debug( f"Received message of kind {kind}, type {message_type}, data: {data}" ) if kind == "action": try: if message_type == self.action: start_time = time.time() self.do_action(data) run_time = int(time.time() - start_time) log.debug(f"{self.action} ran for {run_time} seconds") else: raise ValueError(f"Unknown message type {message_type}") except Exception as e: log.exception(f"Failed to {message_type}: {e}") reply_kind = "action_error" else: reply_kind = "action_done" reply_message = { "kind": reply_kind, "message_type": message_type, "data": data, } return reply_message
def do_action(self, data: Dict) -> None: log.info("Protector called") Config.plugin_protector.validate(Config.plugin_protector) self.config = deepcopy(Config.plugin_protector.config) cg = CoreGraph(tls_data=self.tls_data) resource_parts = [] for cloud_id, accounts in self.config.items(): for account_id, regions in accounts.items(): for region_id, kinds in regions.items(): for kind, resources in kinds.items(): for resource_id in resources: log.debug( f"Protecting {resource_id} of kind {kind} in" f" region {region_id} account {account_id}" f" cloud {cloud_id}") resource_parts.append( f'(/reported.id == "{resource_id}"' f' and /reported.kind == "{kind}"' f' and /ancestors.region.reported.id == "{region_id}"' f' and /ancestors.cloud.reported.id == "{cloud_id}")' ) resource_part = " or ".join(resource_parts) command = f"search {resource_part} | protect" for node_data in cg.execute(command): node = node_from_dict(node_data) log.debug(f"Protected {node.rtdname}")
def shutdown(self, event: Event = None) -> None: log.debug( "Received shutdown event - shutting down resotocore task queue listener" ) self.shutdown_event.set() if self.ws: self.ws.close()
def do_action(self, data: Dict) -> None: log.debug("Cleanup Untagged called") cg = CoreGraph(tls_data=self.tls_data) config = deepcopy(Config.plugin_cleanup_untagged.config) tags_part = 'not(has_key(tags, ["' + '", "'.join( config["tags"]) + '"]))' kinds_part = 'is(["' + '", "'.join(config["kinds"]) + '"])' account_parts = [] for cloud_id, account in config["accounts"].items(): for account_id, account_data in account.items(): age = delta_to_str(account_data.get("age")) account_part = (f'(/ancestors.cloud.id == "{cloud_id}" and ' f'/ancestors.account.id == "{account_id}" and ' f"age > {age})") account_parts.append(account_part) accounts_part = "(" + " or ".join(account_parts) + ")" exclusion_part = "/metadata.protected == false and /metadata.phantom == false and /metadata.cleaned == false" required_tags = ", ".join(config["tags"]) reason = f"Missing one or more of required tags {required_tags}" " and age more than threshold" command = f'query {exclusion_part} and {kinds_part} and {tags_part} and {accounts_part} | clean "{reason}"' for node_data in cg.execute(command): node = node_from_dict(node_data) log.debug( f"Marking {node.rtdname} with age {node.age} for cleanup for" f" missing one or more of tags: {required_tags}")
def __core_metadata( client: ResotoClient, ) -> Tuple[List[CommandInfo], List[str], List[str]]: try: log.debug("Fetching core metadata..") model = client.model() known_kinds = { k for k, v in model.kinds.items() if v.properties is not None } known_props = { p.name for k, v in model.kinds.items() if v.properties is not None for p in v.properties } info = client.cli_info() cmds = [ jsons.load(cmd, CommandInfo) for cmd in info.get("commands", []) ] return cmds, sorted(known_kinds), sorted(known_props) except Exception as ex: log.warning( f"Can not load metadata from core: {ex}. No suggestions as fallback.", exc_info=ex, ) return [], [], []
def collect_account( account: AWSAccount, regions: List, args: Namespace = None, running_config: RunningConfig = None, ) -> Graph: collector_name = f"aws_{account.id}" resotolib.proc.set_thread_name(collector_name) if args is not None: ArgumentParser.args = args setup_logger("resotoworker-aws") if running_config is not None: Config.running_config.apply(running_config) log.debug(f"Starting new collect process for account {account.dname}") aac = AWSAccountCollector(regions, account) try: aac.collect() except botocore.exceptions.ClientError as e: log.exception(f"An AWS {e.response['Error']['Code']} error occurred while collecting account {account.dname}") metrics_unhandled_account_exceptions.labels(account=account.dname).inc() except Exception: log.exception(f"An unhandled error occurred while collecting AWS account {account.dname}") metrics_unhandled_account_exceptions.labels(account=account.dname).inc() return aac.graph
def kill_children(signal: Signals = SIGTERM, ensure_death: bool = False, timeout: int = 3) -> None: procs = psutil.Process().children(recursive=True) num_children = len(procs) if num_children == 0: return elif num_children == 1: log_suffix = "" else: log_suffix = "ren" log.debug(f"Sending {signal.name} to {num_children} child{log_suffix}.") for p in procs: if signal == SIGTERM: p.terminate() else: p.send_signal(signal) if ensure_death: _, alive = psutil.wait_procs(procs, timeout=timeout) for p in alive: log.debug( f"Child with PID {p.pid} is still alive, sending SIGKILL") p.kill()
def connect(self) -> None: resotocore_ws_uri_split = urlsplit(self.resotocore_ws_uri) scheme = resotocore_ws_uri_split.scheme netloc = resotocore_ws_uri_split.netloc path = resotocore_ws_uri_split.path + "/work/queue" query_dict = {"task": ",".join(self.tasks)} query_dict.update( {k: ",".join(v) for k, v in self.task_queue_filter.items()}) query = urlencode(query_dict) ws_uri = urlunsplit((scheme, netloc, path, query, "")) log.debug(f"{self.identifier} connecting to {ws_uri}") headers = {} if getattr(ArgumentParser.args, "psk", None): encode_jwt_to_headers(headers, {}, ArgumentParser.args.psk) self.ws = websocket.WebSocketApp( ws_uri, header=headers, on_open=self.on_open, on_message=self.on_message, on_error=self.on_error, on_close=self.on_close, on_ping=self.on_ping, on_pong=self.on_pong, ) sslopt = None if self.tls_data: sslopt = {"ca_certs": self.tls_data.ca_cert_path} self.ws.run_forever(sslopt=sslopt, ping_interval=30, ping_timeout=10, ping_payload="ping")
def add_plugin_config(self, config: Config) -> None: """Add plugin config to the config object""" if not initialized: self.find_plugins() log.debug("Adding plugin config") for type_plugins in plugins.values(): # iterate over all PluginTypes for Plugin in type_plugins: # iterate over each Plugin of each PluginType Plugin.add_config(config)
def search(self, search: str, edge_type: Optional[EdgeType] = None): log.debug(f"Sending search {search}") headers = {"Accept": "application/x-ndjson"} search_endpoint = self.search_uri if edge_type is not None: query_string = urlencode({"edge_type": edge_type.value}) search_endpoint += f"?{query_string}" return self.post(search_endpoint, search, headers, verify=self.verify)
def __iter__(self): for node in self.graph.nodes: if not node.changes.changed: continue node_dict = node_to_dict(node, changes_only=True) node_json = json.dumps(node_dict) + "\n" log.debug(f"Updating node {node_dict}") yield node_json.encode()
def regions(self) -> List: if len(self.__regions) == 0: if not Config.aws.region or (isinstance(Config.aws.region, list) and len(Config.aws.region) == 0): log.debug("AWS region not specified, assuming all regions") self.__regions = all_regions() else: self.__regions = list(Config.aws.region) return self.__regions
def get_configs(resotocore_uri: str = None, psk: str = None, verify: Optional[str] = None) -> List: resotocore_uri, psk, headers = default_args(resotocore_uri, psk) log.debug("Getting configs") r = requests.get(f"{resotocore_uri}/configs", headers=headers, verify=verify) if r.status_code == 200: return r.json() raise RuntimeError(f"Error getting configs: {r.content.decode('utf-8')}")
def clean(self, value: bool) -> None: if self.phantom and value: raise ValueError(f"Can't cleanup phantom resource {self.rtdname}") clean_str = "" if value else "not " self.log(f"Setting to {clean_str}be cleaned") log.debug(f"Setting {self.rtdname} to {clean_str}be cleaned") self._changes.add("clean") self._clean = value
def add_plugin_args(self, arg_parser: ArgumentParser) -> None: """Add args to the arg parser""" if not initialized: self.find_plugins() log.debug("Adding plugin args") for type_plugins in plugins.values(): # iterate over all PluginTypes for Plugin in type_plugins: # iterate over each Plugin of each PluginType Plugin.add_args( arg_parser) # add that Plugin's args to the ArgumentParser
def dispatch_event(event: Event, blocking: bool = False) -> None: """Dispatch an Event""" waiting_str = "" if blocking else "not " log.debug( f"Dispatching event {event.event_type.name} and {waiting_str}waiting for" " listeners to return") if event.event_type not in _events.keys(): return with _events_lock.read_access: # Event listeners might unregister themselves during event dispatch # so we will work on a shallow copy while processing the current event. listeners = dict(_events[event.event_type]) threads = {} for listener, listener_data in listeners.items(): try: if listener_data["pid"] != os.getpid(): continue if listener_data["one-shot"] and not listener_data["lock"].acquire( blocking=False): log.error(f"Not calling one-shot listener {listener} of type" f" {type(listener)} - can't acquire lock") continue log.debug(f"Calling listener {listener} of type {type(listener)}" f" (blocking: {listener_data['blocking']})") thread_name = f"{event.event_type.name.lower()}_event" f"-{getattr(listener, '__name__', 'anonymous')}" t = Thread(target=listener, args=[event], name=thread_name) if blocking or listener_data["blocking"]: threads[t] = listener t.start() except Exception: log.exception("Caught unhandled event callback exception") finally: if listener_data["one-shot"]: log.debug( f"One-shot specified for event {event.event_type.name} " f"listener {listener} - removing event listener") remove_event_listener(event.event_type, listener) listener_data["lock"].release() start_time = time.time() for thread, listener in threads.items(): timeout = start_time + listeners[listener]["timeout"] - time.time() if timeout < 1: timeout = 1 log.debug( f"Waiting up to {timeout:.2f}s for event listener {thread.name} to finish" ) thread.join(timeout) log.debug( f"Event listener {thread.name} finished (timeout: {thread.is_alive()})" )
def run(self) -> None: self.name = "eventbus-listener" add_event_listener(EventType.SHUTDOWN, self.shutdown) while not self.shutdown_event.is_set(): log.debug("Connecting to resotocore event bus") try: self.connect() except Exception as e: log.error(e) time.sleep(1)
def pre_delete(self, graph: Graph) -> bool: ec2 = aws_resource(self, "ec2", graph) rt = ec2.RouteTable(self.id) for rta in rt.associations: if not rta.main: log_msg = f"Deleting route table association {rta.id}" self.log(log_msg) log.debug(f"{log_msg} for cleanup of {self.kind} {self.dname}") rta.delete() return True
def on_config_event(self, message: Dict[str, Any]) -> None: if (message.get("message_type") == "config-updated" and message.get("data", {}).get("id") == self.config_name and message.get("data", {}).get("revision") != Config.running_config.revision): try: log.debug(f"Config {self.config_name} has changed - reloading") self.load_config(reload=True) except Exception: log.exception("Failed to reload config")
def pre_delete(self, graph: Graph) -> bool: ec2 = aws_resource(self, "ec2", graph) internet_gateway = ec2.InternetGateway(self.id) for predecessor in self.predecessors(graph, edge_type=EdgeType.delete): if isinstance(predecessor, AWSVPC): log_msg = f"Detaching {predecessor.kind} {predecessor.dname}" self.log(log_msg) log.debug(f"{log_msg} for deletion of {self.kind} {self.dname}") internet_gateway.detach_from_vpc(VpcId=predecessor.id) return True
def pre_delete(self, graph: Graph) -> bool: iam = aws_resource(self, "iam", graph) instance_profile = iam.InstanceProfile(self.name) for predecessor in self.predecessors(graph, edge_type=EdgeType.delete): if isinstance(predecessor, AWSIAMRole): log_msg = f"Detaching {predecessor.rtdname}" self.log(log_msg) log.debug(f"{log_msg} for deletion of {self.rtdname}") instance_profile.remove_role(RoleName=predecessor.name) return True
def update_age(self) -> None: try: self.age = parse_delta( Config.plugin_cleanup_aws_loadbalancers.min_age) log.debug(f"Cleanup AWS Load balancers minimum age is {self.age}") except ValueError: log.error( "Error while parsing Cleanup AWS Load balancers minimum age" f" {Config.plugin_cleanup_aws_loadbalancers.min_age}") raise
def add_plugin(self, plugin) -> bool: """Adds a Plugin class to the list of Plugins""" global plugins if (inspect.isclass(plugin) and not inspect.isabstract(plugin) and issubclass(plugin, (BasePlugin, BaseActionPlugin)) and plugin.plugin_type in plugins): log.debug(f"Found plugin {plugin} ({plugin.plugin_type.name})") if plugin not in plugins[plugin.plugin_type]: plugins[plugin.plugin_type].append(plugin) return True
def remove_event_listener(event_type: EventType, listener: Callable) -> bool: """Remove an Event Listener""" with _events_lock.write_access: if event_listener_registered(event_type, listener): log.debug(f"Removing {listener} from event {event_type.name}") del _events[event_type][listener] if len(_events[event_type]) == 0: del _events[event_type] return True return False