def on_get(self, req, resp): """ Handles GET requests for Hosts. :param req: Request instance that will be passed through. :type req: falcon.Request :param resp: Response instance that will be passed through. :type resp: falcon.Response """ try: hosts_dir = self.store.get('/commissaire/hosts/') self.logger.debug('Etcd Response: {0}'.format(hosts_dir)) except etcd.EtcdKeyNotFound: self.logger.warn( 'Etcd does not have any hosts. Returning [] and 404.') resp.status = falcon.HTTP_404 req.context['model'] = None return results = [] # Don't let an empty host directory through if len(hosts_dir._children): for host in hosts_dir.leaves: results.append(Host(**json.loads(host.value))) resp.status = falcon.HTTP_200 req.context['model'] = Hosts(hosts=results) else: self.logger.debug( 'Etcd has a hosts directory but no content.') resp.status = falcon.HTTP_200 req.context['model'] = None
def _calculate_hosts(self, cluster): """ Calculates the hosts metadata for the cluster. :param cluster: The name of the cluster. :type cluster: str """ # XXX: Not sure which wil be more efficient: fetch all # the host data in one etcd call and sort through # them, or fetch the ones we need individually. # For the MVP phase, fetch all is better. try: store_manager = cherrypy.engine.publish('get-store-manager')[0] hosts = store_manager.list(Hosts(hosts=[])) except: self.logger.warn( 'Store does not have any hosts. ' 'Cannot determine cluster stats.') return available = unavailable = total = 0 for host in hosts.hosts: if host.address in cluster.hostset: total += 1 if host.status == 'active': available += 1 else: unavailable += 1 cluster.hosts['total'] = total cluster.hosts['available'] = available cluster.hosts['unavailable'] = unavailable
def on_get(self, req, resp): """ Handles GET requests for Hosts. :param req: Request instance that will be passed through. :type req: falcon.Request :param resp: Response instance that will be passed through. :type resp: falcon.Response """ try: store_manager = cherrypy.engine.publish('get-store-manager')[0] hosts = store_manager.list(Hosts(hosts=[])) if len(hosts.hosts) == 0: raise Exception() resp.status = falcon.HTTP_200 req.context['model'] = hosts except Exception: # This was originally a "no content" but I think a 404 makes # more sense if there are no hosts self.logger.warn( 'Store does not have any hosts. Returning [] and 404.') resp.status = falcon.HTTP_404 req.context['model'] = None return
def test_hosts_listing_with_no_hosts(self): """ Verify listing Hosts when no hosts exists. """ with mock.patch('cherrypy.engine.publish') as _publish: _publish.return_value = Hosts(hosts=[]) body = self.simulate_request('/api/v0/hosts') # datasource's get should have been called once self.assertEqual(self.srmock.status, falcon.HTTP_404) self.assertEqual({}, json.loads(body[0]))
def watcher(queue, store_manager, run_once=False): """ Attempts to connect and check hosts for status. :param queue: Queue to pull work from. :type queue: Queue.Queue :param store_manager: Proxy object for remtote stores :type store_manager: commissaire.store.StoreHandlerManager :param run_once: If only one run should occur. :type run_once: bool """ logger = logging.getLogger('watcher') logger.info('Watcher started') # TODO: should be configurable delta = datetime.timedelta(seconds=20) # TODO: should be configurable throttle = 60 # 1 minute # If the queue is empty attempt to populated it with known hosts if queue.qsize() == 0: logger.info('The WATCHER_QUEUE is empty. ' 'Attempting to populate it from the store.') try: hosts = store_manager.list(Hosts(hosts=[])) for host in hosts.hosts: last_check = datetime.datetime.strptime( host.last_check, "%Y-%m-%dT%H:%M:%S.%f") queue.put_nowait((host, last_check)) logger.debug('Inserted {0} into WATCHER_QUEUE'.format( host.address)) except: logger.info('No hosts found in the store.') while True: try: host, last_run = queue.get_nowait() except Empty: time.sleep(throttle) continue logger.debug('Retrieved {0} from queue. Last check was {1}'.format( host.address, last_run)) now = datetime.datetime.utcnow() if last_run > now - delta: logger.debug('{0} not ready to check. {1}'.format( host.address, last_run)) # Requeue the host with the same last_run queue.put_nowait((host, last_run)) else: logger.info('Checking {0} for availability'.format( host.address)) transport = ansibleapi.Transport(host.remote_user) with TemporarySSHKey(host, logger) as key: results = transport.check_host_availability(host, key.path) host.last_check = now.isoformat() if results[0] == 0: # This means the host is available # Only flip the bit on failed only if host.status == 'failed': try: cluster_type = util.cluster_for_host( host.address, store_manager).type except Exception: logger.debug( '{0} has no cluster type. Assuming {1}'.format( host.address, C.CLUSTER_TYPE_HOST)) cluster_type = C.CLUSTER_TYPE_HOST # If the type is CLUSTER_TYPE_HOST then it should be if cluster_type == C.CLUSTER_TYPE_HOST: host.status = 'disassociated' else: host.status = 'active' else: # If we can not access the host at all throw it to failed host.status = 'failed' host.last_check = now.isoformat() host = store_manager.save(host) # Requeue the host queue.put_nowait((host, now)) logger.debug('{0} has been requeued for next check run'.format( host.address)) if run_once: logger.info('Exiting watcher due to run_once request.') break logger.debug('Sleeping for {0} seconds.'.format(throttle)) time.sleep(throttle) logger.info('Watcher stopping')
def clusterexec(store_manager, cluster_name, command, kwargs={}): """ Remote executes a shell commands across a cluster. :param store_manager: Proxy object for remtote stores :type store_manager: commissaire.store.StoreHandlerManager :param cluster_name: Name of the cluster to act on :type cluster_name: str :param command: Top-level command to execute :type command: str :param kwargs: Keyword arguments for the command :type kwargs: dict """ logger = logging.getLogger('clusterexec') # TODO: This is a hack and should really be done elsewhere command_args = () if command == 'upgrade': finished_hosts_key = 'upgraded' model_instance = ClusterUpgrade.new( name=cluster_name, status='in_process', started_at=datetime.datetime.utcnow().isoformat(), upgraded=[], in_process=[], ) elif command == 'restart': finished_hosts_key = 'restarted' model_instance = ClusterRestart.new( name=cluster_name, status='in_process', started_at=datetime.datetime.utcnow().isoformat(), restarted=[], in_process=[], ) elif command == 'deploy': finished_hosts_key = 'deployed' version = kwargs.get('version', '') command_args = (version,) model_instance = ClusterDeploy.new( name=cluster_name, status='in_process', started_at=datetime.datetime.utcnow().isoformat(), version=version, deployed=[], in_process=[], ) end_status = 'finished' try: # Set the initial status in the store logger.info('Setting initial status.') logger.debug('Status={0}'.format(model_instance.to_json())) store_manager.save(model_instance) except Exception as error: logger.error( 'Unable to save initial state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) return # Collect all host addresses in the cluster try: cluster = store_manager.get(Cluster.new( name=cluster_name, status='', hostset=[])) except Exception as error: logger.warn( 'Unable to continue for cluster "{0}" due to ' '{1}: {2}. Returning...'.format(cluster_name, type(error), error)) return if cluster.hostset: logger.debug( '{0} hosts in cluster "{1}"'.format( len(cluster.hostset), cluster_name)) else: logger.warn('No hosts in cluster "{0}"'.format(cluster_name)) # TODO: Find better way to do this try: hosts = store_manager.list(Hosts(hosts=[])) except Exception as error: logger.warn( 'No hosts in the cluster. Error: {0}. Exiting clusterexec'.format( error)) return for host in hosts.hosts: if host.address not in cluster.hostset: logger.debug( 'Skipping {0} as it is not in this cluster.'.format( host.address)) continue # Move on to the next one oscmd = get_oscmd(host.os) # command_list is only used for logging command_list = getattr(oscmd, command)(*command_args) logger.info('Executing {0} on {1}...'.format( command_list, host.address)) model_instance.in_process.append(host.address) try: store_manager.save(model_instance) except Exception as error: logger.error( 'Unable to save in_process state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) return key = TemporarySSHKey(host, logger) key.create() try: transport = ansibleapi.Transport(host.remote_user) exe = getattr(transport, command) result, facts = exe( host.address, key.path, oscmd, kwargs) # XXX: ansibleapi explicitly raises Exception() except Exception as ex: # If there was a failure set the end_status and break out end_status = 'failed' logger.error('Clusterexec {0} for {1} failed: {2}: {3}'.format( command, host.address, type(ex), ex)) break finally: try: key.remove() logger.debug('Removed temporary key file {0}'.format(key.path)) except: logger.warn( 'Unable to remove the temporary key file: {0}'.format( key.path)) # Set the finished hosts new_finished_hosts = getattr( model_instance, finished_hosts_key) + [host.address] setattr( model_instance, finished_hosts_key, new_finished_hosts) try: idx = model_instance.in_process.index(host.address) model_instance.in_process.pop(idx) except ValueError: logger.warn('Host {0} was not in_process for {1} {2}'.format( host['address'], command, cluster_name)) try: store_manager.save(model_instance) logger.info('Finished executing {0} for {1} in {2}'.format( command, host.address, cluster_name)) except Exception as error: logger.error( 'Unable to save cluster state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) return # Final set of command result model_instance.finished_at = datetime.datetime.utcnow().isoformat() model_instance.status = end_status logger.info('Cluster {0} final {1} status: {2}'.format( cluster_name, command, model_instance.to_json())) try: store_manager.save(model_instance) except Exception as error: logger.error( 'Unable to save final state for "{0}" clusterexec due to ' '{1}: {2}'.format(cluster_name, type(error), error)) logger.info('Clusterexec stopping')