def _walk(self, path, result): """ :type path: str :type result: zoom.www.messages.application_states.ApplicationStatesMessage """ try: children = self._zoo_keeper.get_children(path, watch=self._on_update) if children: for child in children: self._walk(zk_path_join(path, child), result) else: app_state = self._get_application_state(path) result.update( {app_state.configuration_path: app_state.to_dictionary()}) except NoNodeError: result.update({ path: ApplicationState(configuration_path=path, delete=True).to_dictionary(), }) except Exception: logging.exception('An unhandled Exception has occurred while ' 'running ApplicationStateCache.walk.')
def post(self, server): """ @api {post} /api/v1/config/:host Create sentinel config @apiParam {String} XML A string containing the XML of the Sentinel Config @apiVersion 1.0.0 @apiName CreateSentinel @apiGroup Sentinel Config """ logging.info('Adding server {0} for client {1}' .format(server, self.request.remote_ip)) path = zk_path_join(self.agent_configuration_path, server) # add server if it does not already exist if self.zk.exists(path): output = 'Node {0} already exists'.format(server) logging.info(output) else: # get XML data from JSON dictionary data = self.get_argument("XML") logging.info('Received XML configuration for {0}'.format(server)) try: self.zk.create(path, bytes(data)) self.write('Node successfully added.') logging.info('Added {0}'.format(server)) except NoNodeError: output = 'Parent nodes are missing for {0}'.format(path) self.write(output) logging.info(output)
def put(self, server): """ @api {put} /api/v1/config/:host Create|Update sentinel config @apiVersion 1.0.0 @apiName UpdateSentinel @apiGroup Sentinel Config """ logging.info('Updating server {0} for client {1}' .format(server, self.request.remote_ip)) zk_path = zk_path_join(self.agent_configuration_path, server) try: request = json.loads(self.request.body) # get XML data from JSON dictionary data = request.get("XML") logging.info('Received XML configuration for {0}'.format(server)) if not self.zk.exists(zk_path): self.zk.create(zk_path) if not self._is_valid(str(data), server): logging.warning('Not updating invalid config for server: {0}' .format(server)) else: self.zk.set(zk_path, str(data)) self.write('Node successfully updated.') logging.info('Updated server {0}'.format(server)) except NoNodeError: output = 'Node does {0} not exist.'.format(zk_path) logging.exception(output) self.write(output)
def post(self, server): """ @api {post} /api/v1/config/:host Create sentinel config @apiParam {String} XML A string containing the XML of the Sentinel Config @apiVersion 1.0.0 @apiName CreateSentinel @apiGroup Sentinel Config """ server = cap_hostname(server) logging.info('Adding server {0} for client {1}'.format( server, self.request.remote_ip)) path = zk_path_join(self.agent_configuration_path, server) # add server if it does not already exist if self.zk.exists(path): output = 'Node {0} already exists'.format(server) logging.info(output) else: # get XML data from JSON dictionary data = self.get_argument("XML") logging.info('Received XML configuration for {0}'.format(server)) try: self.zk.create(path, bytes(data)) self.write('Node successfully added.') logging.info('Added {0}'.format(server)) except NoNodeError: output = 'Parent nodes are missing for {0}'.format(path) self.write(output) logging.info(output)
def _walk(self, path, result): """ :type path: str :type result: zoom.www.messages.application_states.ApplicationStatesMessage """ try: children = self._zoo_keeper.get_children(path, watch=self._on_update) if children: for child in children: self._walk(zk_path_join(path, child), result) else: app_state = self._get_application_state(path) result.update( {app_state.configuration_path: app_state.to_dictionary()} ) except NoNodeError: result.update({path: ApplicationState(configuration_path=path, delete=True).to_dictionary(), }) except Exception: logging.exception('An unhandled Exception has occurred while ' 'running ApplicationStateCache.walk.')
def _double_check_config(self, server, id_to_find=None, reg_to_find=None): """ It is possible that the ApplicationStateCache will have a stale host value. Check the actual config to make sure the component_id is REALLY there. :type server: str :type id_to_find: str or None :type reg_to_find: str or None :rtype: bool """ path = zk_path_join(self.agent_configuration_path, server) if self.zk.exists(path): xmlstr, stat = self.zk.get(path) else: return False config = ElementTree.fromstring(xmlstr) for component in config.iter('Component'): comp_id = component.get('id') comp_reg_path = component.get('registrationpath') if id_to_find and id_to_find == comp_id: return True elif reg_to_find and reg_to_find == comp_reg_path: return True return False
def put(self, server): """ @api {put} /api/v1/config/:host Create|Update sentinel config @apiVersion 1.0.0 @apiName UpdateSentinel @apiGroup Sentinel Config """ server = cap_hostname(server) logging.info('Updating server {0} for client {1}'.format( server, self.request.remote_ip)) zk_path = zk_path_join(self.agent_configuration_path, server) try: request = json.loads(self.request.body) # get XML data from JSON dictionary data = request.get("XML") logging.info('Received XML configuration for {0}'.format(server)) if not self.zk.exists(zk_path): self.zk.create(zk_path) if not self._is_valid(str(data), server): logging.warning( 'Not updating invalid config for server: {0}'.format( server)) else: self.zk.set(zk_path, str(data)) self.write('Node successfully updated.') logging.info('Updated server {0}'.format(server)) except NoNodeError: output = 'Node does {0} not exist.'.format(zk_path) logging.exception(output) self.write(output)
def _submit_task(self, task): """ If node does not exist in ZK, create node. Set data watch on that node. :type task: zoom.agent.task.task.Task """ try: task_path = zk_path_join(self._configuration.task_path, task.host) if self._zoo_keeper.exists(task_path): # if the node exists, check if it is done e = WatchedEvent(None, None, task_path) self._on_update(e) else: logging.info("Creating task node for path {0}: {1}".format( task_path, task)) try: self._zoo_keeper.create(task_path, value=task.to_json()) except NodeExistsError: pass self._zoo_keeper.get(task_path, watch=self._on_update) except NoNodeError: pass
def _submit_task(self, task): """ If node does not exist in ZK, create node. Set data watch on that node. :type task: zoom.agent.task.task.Task """ try: task_path = zk_path_join(self._configuration.task_path, task.host) if self._zoo_keeper.exists(task_path): # if the node exists, check if it is done e = WatchedEvent(None, None, task_path) self._on_update(e) else: logging.info("Creating task node for path {0}: {1}" .format(task_path, task)) try: self._zoo_keeper.create(task_path, value=task.to_json()) except NodeExistsError: pass self._zoo_keeper.get(task_path, watch=self._on_update) except NoNodeError: pass
def _double_check_config(self, server, id_to_find=None, reg_to_find=None): """ It is possible that the ApplicationStateCache will have a stale host value. Check the actual config to make sure the component_id is REALLY there. :type server: str :type id_to_find: str or None :type reg_to_find: str or None :rtype: bool """ path = zk_path_join(self.agent_configuration_path, server) if self.zk.exists(path): xmlstr, stat = self.zk.get(path) else: return False config = ElementTree.fromstring(xmlstr) for component in config.iter('Component'): comp_id = component.get('id') comp_reg_path = component.get('registrationpath') if id_to_find and id_to_find == comp_id: return True elif reg_to_find and reg_to_find == comp_reg_path: return True return False
def clear_all_tasks(self): """ Delete all queued tasks on the server and in Zookeeper """ self._task_queue.clear() children = self._zoo_keeper.get_children(self._configuration.task_path) for c in children: path = zk_path_join(self._configuration.task_path, c) logging.info('Deleting stale task node {0}'.format(path)) self._zoo_keeper.delete(path)
def clear_all_tasks(self): """ Delete all queued tasks on the server and in Zookeeper """ self._task_queue.clear() children = self._zoo_keeper.get_children(self._configuration.task_path) for c in children: path = zk_path_join(self._configuration.task_path, c) logging.info('Deleting stale task node {0}'.format(path)) self._zoo_keeper.delete(path)
def live_tasks(self): """ Return Tasks submitted to sentinel agents :rtype: dict """ tasks = dict() children = self._zoo_keeper.get_children(self._configuration.task_path) for c in children: path = zk_path_join(self._configuration.task_path, c) data, stat = self._zoo_keeper.get(path) tasks[c] = json.loads(data) return tasks
def live_tasks(self): """ Return Tasks submitted to sentinel agents :rtype: dict """ tasks = dict() children = self._zoo_keeper.get_children(self._configuration.task_path) for c in children: path = zk_path_join(self._configuration.task_path, c) data, stat = self._zoo_keeper.get(path) tasks[c] = json.loads(data) return tasks
def _walk(self, path, result): """ :type path: str :type result: ApplicationDependenciesMessage """ try: children = self._zoo_keeper.get_children(path, watch=self._on_update) if children: for child in children: self._walk(zk_path_join(path, child), result) else: self._get_application_dependency(path, result) except NoNodeError: logging.debug('Node at {0} no longer exists.'.format(path))
def _get_application_dependency(self, path, result): """ Load result object with application dependencies :type path: str :type result: ApplicationDependenciesMessage """ if self._zoo_keeper.exists(path): data, stat = self._zoo_keeper.get(path, watch=self._on_update) if not data: return try: root = ElementTree.fromstring(data) for node in root.findall('Automation/Component'): app_id = node.attrib.get('id') registrationpath = node.attrib.get('registrationpath', None) if registrationpath is None: registrationpath = zk_path_join( self._configuration.application_state_path, app_id) start_action = node.find('Actions/Action[@id="start"]') if start_action is None: logging.warn("No Start Action Found for {0}".format( registrationpath)) dependencies = list() else: dependencies = self._parse_dependencies(start_action) data = { "configuration_path": registrationpath, "dependencies": dependencies, "downstream": list() } result.update({registrationpath: data}) except Exception: logging.exception('An unhandled exception occurred') else: logging.warn("config path does not exist: {0}".format(path))
def _walk(self, node, node_list): """ Recursively walk a ZooKeeper path and add all children to the _children list as ZookeeperHasChildren objects. :type node: str """ children = self.zkclient.get_children(node, watch=self._rewalk_tree) if children: for c in children: path = zk_path_join(node, c) self._walk(path, node_list) else: data, stat = self.zkclient.get(node) if stat.ephemeralOwner == 0: # not ephemeral if fnmatch.fnmatch(node, self.nodepattern): node_list.append(node) else: if fnmatch.fnmatch(os.path.dirname(node), self.nodepattern): node_list.append(os.path.dirname(node))
def _walk(self, node, node_list): """ Recursively walk a ZooKeeper path and add all children to the _children list as ZookeeperHasChildren objects. :type node: str """ children = self.zkclient.get_children(node, watch=self._rewalk_tree) if children: for c in children: path = zk_path_join(node, c) self._walk(path, node_list) else: data, stat = self.zkclient.get(node) if stat.ephemeralOwner == 0: # not ephemeral if fnmatch.fnmatch(node, self.nodepattern): node_list.append(node) else: if fnmatch.fnmatch(os.path.dirname(node), self.nodepattern): node_list.append(os.path.dirname(node))
def delete(self, server): """ @api {put} /api/v1/config/:host Delete sentinel config @apiVersion 1.0.0 @apiName DeleteSentinel @apiGroup Sentinel Config """ logging.info('Deleting server {0} for client' .format(server, self.request.remote_ip)) path = zk_path_join(self.agent_configuration_path, server) # recursively delete server and children try: self.zk.delete(path) self.write('Node successfully deleted.') logging.info('Deleted {0}'.format(server)) except NoNodeError: output = 'Node {0} does not exist.'.format(path) logging.error(output) self.write(output)
def delete(self, server): """ @api {put} /api/v1/config/:host Delete sentinel config @apiVersion 1.0.0 @apiName DeleteSentinel @apiGroup Sentinel Config """ server = cap_hostname(server) logging.info('Deleting server {0} for client'.format( server, self.request.remote_ip)) path = zk_path_join(self.agent_configuration_path, server) # recursively delete server and children try: self.zk.delete(path) self.write('Node successfully deleted.') logging.info('Deleted {0}'.format(server)) except NoNodeError: output = 'Node {0} does not exist.'.format(path) logging.error(output) self.write(output)
def _has_exception(self, key): """ Check the override node to see if pagerduty alerts should be disabled :type key: str :rtype: bool """ # TODO: change the key or add a different field so that we don't have to # do that messy construction below... try: app_id = '/'.join(key.split('/')[1:-1]) app_state_path = zk_path_join(self._state_path, app_id) data, stat = self._zk.get(self._override_path) d = json.loads(data) return d.get(app_state_path, {}).get('pd_disabled', False) except ValueError: logging.error('Node {0} has malformed JSON.' .format(self._override_path)) return False except NoNodeError: return False
def get(self, server): """ @api {get} /api/v1/config/:host Get sentinel config for server @apiVersion 1.0.0 @apiName GetSentinel @apiGroup Sentinel Config """ logging.info('Searching for server {0}'.format(server)) path = zk_path_join(self.agent_configuration_path, server) # get tuple (value, ZnodeStat) if the node exists if self.zk.exists(path): data, stat = self.zk.get(path) logging.info('Found server {0}. ' 'Outputting XML configuration.'.format(server)) # write server data self.set_header('Content-Type', 'application/json') self.write(json.dumps(data)) else: output = 'Node does not exist.' logging.error(output) self.write(output)
def add_task(self, task, is_cancel=False): """ Add Task to UniqueQueue. Submit task node to ZooKeeper. If `is_cancel` clear the queue, and submit only cancel. :type task: zoom.agent.task.task.Task :type is_cancel: bool """ if task.host not in self._task_queue: self._task_queue[task.host] = UniqueQueue() host_q = self._task_queue.get(task.host) if is_cancel: host_q.clear() task_path = zk_path_join(self._configuration.task_path, task.host) try: self._zoo_keeper.delete(task_path) except NoNodeError: pass host_q.append_unique(task, sender=task.host) self._submit_task(task)
def add_task(self, task, is_cancel=False): """ Add Task to UniqueQueue. Submit task node to ZooKeeper. If `is_cancel` clear the queue, and submit only cancel. :type task: zoom.agent.task.task.Task :type is_cancel: bool """ if task.host not in self._task_queue: self._task_queue[task.host] = UniqueQueue() host_q = self._task_queue.get(task.host) if is_cancel: host_q.clear() task_path = zk_path_join(self._configuration.task_path, task.host) try: self._zoo_keeper.delete(task_path) except NoNodeError: pass host_q.append_unique(task, sender=task.host) self._submit_task(task)
def get(self, server): """ @api {get} /api/v1/config/:host Get sentinel config for server @apiVersion 1.0.0 @apiName GetSentinel @apiGroup Sentinel Config """ server = cap_hostname(server) logging.info('Searching for server {0}'.format(server)) path = zk_path_join(self.agent_configuration_path, server) # get tuple (value, ZnodeStat) if the node exists if self.zk.exists(path): data, stat = self.zk.get(path) logging.info('Found server {0}. ' 'Outputting XML configuration.'.format(server)) # write server data self.set_header('Content-Type', 'application/json') self.write(json.dumps(data)) else: output = 'Node does not exist.' logging.error(output) self.write(output)
def __init__(self, zookeeper, **kwargs): """ :type zookeeper: :rtype: zoom.www.entities.zoo_keeper.ZooKeeper """ self._zookeeper = zookeeper self._settings = kwargs try: data, stat = self._zookeeper.get(ZOOM_CONFIG) config = json.loads(data) # create 'logs' directory if it does not exist if not os.path.exists("logs"): os.makedirs("logs") # initialize logging logging_config = config.get('logging') logging.config.dictConfig(logging_config) # get system type running_os = self._get_system() self._host = socket.gethostname() # web_server web_server_settings = config.get('web_server') self._port = self._get_setting('port', web_server_settings.get('port')) self._is_debug = web_server_settings.get('debug') self._application_path = os.getcwd() self._client_path = zk_path_join((os.path.normpath(os.getcwd() + os.sep + os.pardir)), 'client') self._doc_path = zk_path_join((os.path.normpath(os.getcwd() + os.sep + os.pardir)), "doc") self._html_path = zk_path_join(self._client_path, "views") self._images_path = zk_path_join(self._client_path, "images") self._pid = os.getpid() self._environment = self._get_setting('environment', os.environ.get('EnvironmentToUse', 'Staging')) # zookeeper zookeeper_settings = config.get('zookeeper') self._agent_configuration_path = zookeeper_settings.get('agent_configuration_path') self._agent_state_path = zookeeper_settings.get('agent_state_path') self._task_path = zookeeper_settings.get('task_path') self._application_state_path = zookeeper_settings.get('application_state_path') self._global_mode_path = zookeeper_settings.get('global_mode_path') self._pillar_path = zookeeper_settings.get('pillar_path') self._alert_path = zookeeper_settings.get('alert_path') self._override_node = zookeeper_settings.get('override_node', '/spot/software/config/override') self._zookeeper_host = get_zk_conn_string(self._environment) # pagerduty pagerduty_settings = config.get('pagerduty') self._pagerduty_default_svc_key = pagerduty_settings.get('pagerduty_default_svc_key') self._pagerduty_api_token = pagerduty_settings.get('pagerduty_api_token') self._pagerduty_subdomain = pagerduty_settings.get('pagerduty_subdomain') self._pagerduty_enabled_environments = pagerduty_settings.get('pagerduty_enabled_environments') self._pagerduty_alert_footer = pagerduty_settings.get('pagerduty_footer', '') # database db_settings = config.get('database') self._db_type = db_settings.get('db_type') if running_os == PlatformType.WINDOWS: self._sql_connection = db_settings.get('sql_connection_windows') elif running_os == PlatformType.LINUX: self._sql_connection = db_settings.get('sql_connection') # authentication ad_settings = config.get('active_directory') self._ldap_server = ad_settings.get('host') self._ldap_port = ad_settings.get('port') # environment specific env_settings = config.get(self._environment.lower()) self._read_write_groups = env_settings.get('read_write_groups') self._graphite_host = env_settings.get('graphite_host') self._graphite_recheck = env_settings.get('graphite_recheck', '5m') # chatops chatops_settings = env_settings.get('chatops', {}) self._chatops_url = chatops_settings.get('url') self._chatops_group = chatops_settings.get('group') self._chatops_commands_to_chat = chatops_settings.get('commands_to_chat') # message throttling throttle_settings = config.get('message_throttle') self._throttle_interval = throttle_settings.get('interval') # salt self._salt_settings = env_settings.get('saltREST') except ValueError as e: logging.error('Data at {0} is not valid JSON.'.format(ZOOM_CONFIG)) raise e except Exception as e: logging.exception('An unhandled exception occurred.') raise e
def _get_application_state(self, path): """ :type path: str :rtype: zoom.entities.application_state.ApplicationState """ data, stat = self._get_app_details(path) # persistent node if stat.ephemeralOwner == 0: # watch node to see if children are created self._zoo_keeper.get_children(path, watch=self._on_update) host = data.get('host', 'Unknown') name = data.get('name', os.path.basename(path)) agent_path = zk_path_join(self._configuration.agent_state_path, host) # if the agent is down, update state and mode with unknown agent_up = bool( self._zoo_keeper.exists(agent_path, watch=self._on_agent_state_update)) valid = True if host in (None, 'Unknown'): data['state'] = 'invalid' data['mode'] = 'unknown' valid = False elif not agent_up: data['state'] = 'unknown' data['mode'] = 'unknown' valid = False elif agent_up: d, s = self._zoo_keeper.get(agent_path) registered_comps = json.loads(d).get('components', []) if name not in registered_comps: data['state'] = 'invalid' data['mode'] = 'unknown' valid = False self._update_mapping(host, {path: valid}) application_state = ApplicationState( application_name=name, configuration_path=path, application_status=ApplicationStatus.STOPPED, application_host=host, last_update=stat.last_modified, start_stop_time=data.get('start_stop_time', ''), error_state=data.get('state', 'unknown'), local_mode=data.get('mode', 'unknown'), login_user=data.get('login_user', 'Zoom'), read_only=data.get('read_only', False), last_command=self._get_last_command(data), pd_disabled=self._get_existing_attribute(path, 'pd_disabled'), grayed=self._get_existing_attribute(path, 'grayed'), platform=data.get('platform', 'unknown'), restart_count=data.get('restart_count', 0), load_times=self._time_estimate_cache.get_graphite_data(path)) # ephemeral node else: # watch node to see if it goes away self._zoo_keeper.get_children(os.path.dirname(path), watch=self._on_update) host = os.path.basename(path) # if it is running, path = /app/path/HOSTNAME # need to convert to /app/path to get the app_details config_path = os.path.dirname(path) parent_data, parent_stat = self._get_app_details(config_path) self._update_mapping(host, {config_path: True}) application_state = ApplicationState( application_name=parent_data.get( 'name', os.path.basename(config_path)), configuration_path=config_path, application_status=ApplicationStatus.RUNNING, application_host=host, last_update=stat.last_modified, start_stop_time=parent_data.get('start_stop_time', ''), error_state=parent_data.get('state', 'unknown'), local_mode=parent_data.get('mode', 'unknown'), login_user=parent_data.get('login_user', 'Zoom'), read_only=parent_data.get('read_only', False), last_command=self._get_last_command(parent_data), pd_disabled=self._get_existing_attribute( config_path, 'pd_disabled'), grayed=self._get_existing_attribute(config_path, 'grayed'), platform=parent_data.get('platform', 'unknown'), restart_count=parent_data.get('restart_count', 0), load_times=self._time_estimate_cache.get_graphite_data( config_path)) return application_state
def _assemble_path(self, minion): return zk_path_join(self.pillar_path, minion)
def _handle_alerts(self, event=None): """ Watch path in ZooKeeper for node creation. If there is a node, connect to PagerDuty and either Trigger or Resolve an incident. :type event: kazoo.protocol.states.WatchedEvent or None """ # TODO: sort by ctime? Could there be a race condition here? self._clean_up_threads() try: alerts = self._zk.get_children(self._path, watch=self._handle_alerts) except (SessionExpiredError, ConnectionClosedError): logging.info('Session with ZK has expired. Will not process alerts ' 'until reconnect.') return for alert in alerts: path = zk_path_join(self._path, alert) try: data, stat = self._zk.get(path) alert_data = json.loads(data) action = alert_data.get('action') i_key = alert_data.get('incident_key') if action == AlertActionType.TRIGGER: if not self._has_exception(i_key): t = Thread(target=self._pd.trigger, name='pd_{0}'.format(i_key), args=(alert_data.get('service_key'), i_key, alert_data.get('description'), alert_data.get('details')), ) t.daemon = True t.start() self._threads.append(t) else: logging.info('Ignoring alert for {0}'.format(i_key)) elif action == AlertActionType.RESOLVE: t = Thread(target=self._pd.resolve, name='pd_{0}'.format(i_key), args=(alert_data.get('service_key'), i_key), ) t.daemon = True t.start() self._threads.append(t) else: logging.warning('Unknown action type: {0}'.format(action)) continue self._zk.delete(path) except NoNodeError: logging.info('No node at {0}. Skipping alert.'.format(path)) continue except ValueError: logging.warning('Node at {0} has invalid JSON.'.format(path)) continue
def _assemble_path(self, minion): return zk_path_join(self.pillar_path, minion)
def _get_application_state(self, path): """ :type path: str :rtype: zoom.entities.application_state.ApplicationState """ data, stat = self._get_app_details(path) # persistent node if stat.ephemeralOwner == 0: # watch node to see if children are created self._zoo_keeper.get_children(path, watch=self._on_update) host = data.get('host', 'Unknown') name = data.get('name', os.path.basename(path)) agent_path = zk_path_join(self._configuration.agent_state_path, host) # if the agent is down, update state and mode with unknown agent_up = bool(self._zoo_keeper.exists( agent_path, watch=self._on_agent_state_update)) valid = True if host in (None, 'Unknown'): data['state'] = 'invalid' data['mode'] = 'unknown' valid = False elif not agent_up: data['state'] = 'unknown' data['mode'] = 'unknown' valid = False elif agent_up: d, s = self._zoo_keeper.get(agent_path) registered_comps = json.loads(d).get('components', []) if name not in registered_comps: data['state'] = 'invalid' data['mode'] = 'unknown' valid = False self._update_mapping(host, {path: valid}) application_state = ApplicationState( application_name=name, configuration_path=path, application_status=ApplicationStatus.STOPPED, application_host=host, last_update=stat.last_modified, start_stop_time=data.get('start_stop_time', ''), error_state=data.get('state', 'unknown'), local_mode=data.get('mode', 'unknown'), login_user=data.get('login_user', 'Zoom'), read_only=data.get('read_only', False), last_command=self._get_last_command(data), pd_disabled=self._get_existing_attribute(path, 'pd_disabled'), grayed=self._get_existing_attribute(path, 'grayed'), platform=data.get('platform', 'unknown'), restart_count=data.get('restart_count', 0), load_times=self._time_estimate_cache.get_graphite_data(path) ) # ephemeral node else: # watch node to see if it goes away self._zoo_keeper.get_children(os.path.dirname(path), watch=self._on_update) host = os.path.basename(path) # if it is running, path = /app/path/HOSTNAME # need to convert to /app/path to get the app_details config_path = os.path.dirname(path) parent_data, parent_stat = self._get_app_details(config_path) self._update_mapping(host, {config_path: True}) application_state = ApplicationState( application_name=parent_data.get('name', os.path.basename(config_path)), configuration_path=config_path, application_status=ApplicationStatus.RUNNING, application_host=host, last_update=stat.last_modified, start_stop_time=parent_data.get('start_stop_time', ''), error_state=parent_data.get('state', 'unknown'), local_mode=parent_data.get('mode', 'unknown'), login_user=parent_data.get('login_user', 'Zoom'), read_only=parent_data.get('read_only', False), last_command=self._get_last_command(parent_data), pd_disabled=self._get_existing_attribute(config_path, 'pd_disabled'), grayed=self._get_existing_attribute(config_path, 'grayed'), platform=parent_data.get('platform', 'unknown'), restart_count=parent_data.get('restart_count', 0), load_times=self._time_estimate_cache.get_graphite_data(config_path) ) return application_state
def __init__(self, zookeeper, **kwargs): """ :type zookeeper: kazoo.client.KazooClient """ self._zookeeper = zookeeper self._settings = kwargs try: data, stat = self._zookeeper.get(ZOOM_CONFIG) config = json.loads(data) # create 'logs' directory if it does not exist if not os.path.exists("logs"): os.makedirs("logs") # initialize logging logging_config = config.get('logging') logging.config.dictConfig(logging_config) # get system type running_os = self._get_system() self._host = socket.gethostname() # web_server web_server_settings = config.get('web_server') self._port = self._get_setting('port', web_server_settings.get('port')) self._is_debug = web_server_settings.get('debug') self._application_path = os.getcwd() self._client_path = zk_path_join( (os.path.normpath(os.getcwd() + os.sep + os.pardir)), 'client') self._doc_path = zk_path_join( (os.path.normpath(os.getcwd() + os.sep + os.pardir)), "doc") self._html_path = zk_path_join(self._client_path, "views") self._images_path = zk_path_join(self._client_path, "images") self._pid = os.getpid() self._environment = self._get_setting( 'environment', os.environ.get('EnvironmentToUse', 'Staging')) # zookeeper zookeeper_settings = config.get('zookeeper') self._zookeeper_paths = zookeeper_settings self._agent_configuration_path = zookeeper_settings.get( 'agent_configuration_path') self._agent_state_path = zookeeper_settings.get('agent_state_path') self._task_path = zookeeper_settings.get('task_path') self._application_state_path = zookeeper_settings.get( 'application_state_path') self._global_mode_path = zookeeper_settings.get('global_mode_path') self._pillar_path = zookeeper_settings.get('pillar_path') self._alert_path = zookeeper_settings.get('alert_path') self._override_node = zookeeper_settings.get( 'override_node', '/spot/software/config/override') self._temp_directory = zookeeper_settings.get('temp_directory') self._zookeeper_host = get_zk_conn_string(self._environment) # pagerduty pagerduty_settings = config.get('pagerduty') self._pagerduty_default_svc_key = pagerduty_settings.get( 'pagerduty_default_svc_key') self._pagerduty_api_token = pagerduty_settings.get( 'pagerduty_api_token') self._pagerduty_subdomain = pagerduty_settings.get( 'pagerduty_subdomain') self._pagerduty_enabled_environments = pagerduty_settings.get( 'pagerduty_enabled_environments') self._pagerduty_alert_footer = pagerduty_settings.get( 'pagerduty_footer', '') # database db_settings = config.get('database') self._db_type = db_settings.get('db_type') if running_os == PlatformType.WINDOWS: self._sql_connection = db_settings.get( 'sql_connection_windows') else: self._sql_connection = db_settings.get('sql_connection') # authentication ad_settings = config.get('active_directory') self._ldap_server = ad_settings.get('host') self._ldap_port = ad_settings.get('port') # environment specific env_settings = config.get(self._environment.lower()) self._read_write_groups = env_settings.get('read_write_groups') self._graphite_host = env_settings.get('graphite_host') self._graphite_recheck = env_settings.get('graphite_recheck', '5m') # chatops chatops_settings = env_settings.get('chatops', {}) self._chatops_url = chatops_settings.get('url') self._chatops_group = chatops_settings.get('group') self._chatops_commands_to_chat = chatops_settings.get( 'commands_to_chat') # message throttling throttle_settings = config.get('message_throttle') self._throttle_interval = throttle_settings.get('interval') # salt self._salt_settings = env_settings.get('saltREST') except ValueError as e: logging.error('Data at {0} is not valid JSON.'.format(ZOOM_CONFIG)) raise e except NoNodeError as e: logging.error('Config node missing: {}'.format(ZOOM_CONFIG)) raise e except Exception as e: logging.exception('An unhandled exception occurred.') raise e