def handler(_): ''' handler for when this node's mapping has been updated ''' try: with self.lock: self.node.refresh(self.r) instances = self.redmanager.list_instances() for app_id in instances: if app_id not in self.node.apps: self.redmanager.delete_instance(app_id) app = schema.App(app_id=app_id) app.clear_node_status(self.node.node_id, self.rmaster) for app_id in self.node.apps: app = schema.App(app_id=app_id) app.refresh(self.r) if app_id not in instances: if self.new_app(app): # just set it to 'run' optimistically. It will be corrected if necessary very fast app.set_node_status(self.node.node_id, self.rmaster, revision=app.revision, state='run') self.logger.debug('Created app:%s, revision:%s', app.app_id, app.revision) else: self.logger.error('Failed to create! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) else: if self.update_app(app): app.set_node_revision(self.node.node_id, app.revision, self.rmaster) self.logger.debug('Updated app! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) else: self.logger.error('Failed to update! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) self.update_subs() except Exception, exc: self.logger.exception(exc)
def check_clusters_vs_node_apps(testlogger, r): ''' Check that clusters match node-app sets ''' result = True m = schema.Monaco() m.refresh(r) for app_id in m.app_ids: app = schema.App(app_id=app_id) app.refresh(r) for node_id, _ in app.nodes.iteritems(): if not app_id in r.smembers(schema.NODE_APPS_TMPL % node_id): testlogger.error( "App %s has node %s in it's cluster, but the node doesn't have it in it's app-set", app_id, node_id) result = False for node_id in m.node_ids: node = schema.MonacoNode(node_id=node_id) node.refresh(r) for app_id in node.apps: if not node_id in r.hkeys(schema.APP_CLUSTER_TMPL % app_id): testlogger.error( "Node %s has app %s in its app-set, but the corresponding app doesn't have the node in it's cluster", node_id, app_id) result = False return result
def app_redis_api(app_id): ''' Simple REST api to redis DBs. GET, PUT, DELETE are key operations, and POST allows for any command method = 'GET': ../redis?key=key return r.get('key') method = 'PUT': ../redis?key=key&val=val return r.set('key', 'val') method = 'DELETE': ../redis?key=key return r.delete('key') method = 'POST': ../redis?cmd=hset&args=key,hashkey,hashval Request args return getattr(r,cmd)(*args.split(',')) aka r.hset(key, hashkey, hashval) ''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) app_id = str(app_id) if not app_id in monaco.app_ids: abort(404) userapp = schema.App(app_id=app_id) userapp.refresh(r) master_host = None for node_id, role in userapp.nodes.iteritems(): if role == 'master': master_host = monaco.hostnames_by_node_id[node_id] break assert master_host != None r = StrictRedis(master_host, userapp.port) if request.method == 'GET': if 'key' not in request.args: abort(400) return r.get(request.args['key']) if request.method == 'PUT': if 'key' not in request.args or 'val' not in request.args: abort(400) return r.set(request.args['key'], request.args['val']) if request.method == 'DELETE': if 'key' not in request.args: abort(400) return r.delete(request.args['key']) if request.method == 'POST': if 'cmd' not in request.args or not hasattr(r, request.args['cmd']): abort(400) if 'args' in request.args: args = request.args['args'].split(',') else: args = [] return getattr(r, request.args['cmd'])(*args) abort(400)
def proxy_stats(twem_id): ''' Returns live aggregates for a given proxy ''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) if not str(twem_id) in monaco.twem_ids: abort(404) twem = schema.MonacoTwem(twem_id=twem_id) twem.refresh(r) aggregate_rps = 0 aggregate_connections = 0 if len(twem.servers) == 1: dbapp = schema.App(app_id=twem.servers[0]) dbapp.refresh(r) for node_id, _ in dbapp.nodes.iteritems(): appcli = StrictRedis(monaco.hostnames_by_node_id[node_id], dbapp.port) info = appcli.info() if 'instantaneous_ops_per_sec' in info: aggregate_rps += info['instantaneous_ops_per_sec'] if 'connected_clients' in info: aggregate_connections += info['connected_clients'] else: for app_id in twem.servers: dbapp = schema.App(app_id=app_id) dbapp.refresh(r) appcli = dbapp.get_master_connection(r) info = appcli.info() if 'instantaneous_ops_per_sec' in info: aggregate_rps += info['instantaneous_ops_per_sec'] if 'connected_clients' in info: aggregate_connections += info['connected_clients'] return jsonify({ 'total_rps': aggregate_rps, 'total_connections': aggregate_connections })
def app_stats(app_id): ''' Returns the redis info in json form ''' try: r = rediscli() dbapp = schema.App(app_id=app_id) dbapp.refresh(r) r = dbapp.get_master_connection(r) return jsonify(r.info()) except Exception: # spare my email from the hounds of AJAX abort(500)
def app_view(app_id): ''' Web UI for an App ''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) if not str(app_id) in monaco.app_ids: abort(404) dbapp = schema.App(app_id=app_id) dbapp.refresh(r) data = {} for node, role in dbapp.nodes.iteritems(): node = schema.MonacoNode(node_id=node) node.refresh(r) if role == 'master': data[role] = {'host': node.hostname, 'port': dbapp.port} elif role in data: data[role].append({'host': node.hostname, 'port': dbapp.port}) else: data[role] = [{'host': node.hostname, 'port': dbapp.port}] data['app_id'] = app_id data['name'] = dbapp.name # scale bytes to human readable mb/gb data['maxmemory'] = dbapp.maxmemory data['maxmemory_policy'] = dbapp.maxmemory_policy data['persist'] = dbapp.persist == 'True' data['replicas'] = dbapp.replicas data['slavelb'] = dbapp.slavelb == 'True' data['owner'] = dbapp.owner data['operator'] = dbapp.operator data['memory_target'] = '&target=monaco.%s.%s.%s.used_memory' % ( app.config['ENV'], app.config['LOCATION'], app_id, ) data[ 'rps_target'] = '&target=monaco.%s.%s.%s.instantaneous_ops_per_sec' % ( app.config['ENV'], app.config['LOCATION'], app_id, ) data['conn_target'] = '&target=monaco.%s.%s.%s.connected_clients' % ( app.config['ENV'], app.config['LOCATION'], app_id, ) data['cpu_target'] = '&target=monaco.%s.%s.%s.cpu_percent' % ( app.config['ENV'], app.config['LOCATION'], app_id, ) return render_template('db.html', **data)
def __init__(self, app_id, node_id, threshold=3): threading.Thread.__init__(self) self.app_id = app_id self.app = schema.App(app_id=self.app_id) self.r = redis.StrictRedis(port=config.config['mgmt_port']) self.app.refresh(self.r) self.app_conn = self.app.get_master_connection(self.r) self.node_id = node_id self.interval = config.config['stats']['interval'] self.threshold = threshold self.failcount = 0 self.logger = STATLOGGER self.redmgr = redismgmt.RedisMgmt() self._run = True
def validate_app_invariences(testlogger, r): ''' Ensure that the user specified replica count is maintained ''' result = True m = schema.Monaco() m.refresh(r) for app_id in m.app_ids: app = schema.App(app_id=app_id) app.refresh(r) if int(app.replicas) != len(app.nodes): testlogger.error("App %s doesn't have the desired replica count", app_id) result = False return result
def list_apps_by_operator(groups): ''' lists all apps operated by one of the groups ''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) apps = [] for app_id in monaco.app_ids: try: dbapp = schema.App(app_id=app_id) dbapp.refresh(r) if dbapp.operator in groups: apps.append(app_id) except Exception: pass return apps
def list_apps_by_owner(owner): ''' list all apps where owner == app.owner ''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) apps = [] for app_id in monaco.app_ids: try: dbapp = schema.App(app_id=app_id) dbapp.refresh(r) if dbapp.owner == owner: apps.append(app_id) except Exception: pass return apps
def twem_conf_struct(self, twem, retry=True): ''' Given a schema.MonacoTwem, returns the nutcracker config for that proxy in dict form ''' try: if type(twem) != schema.MonacoTwem: twem = schema.MonacoTwem(twem_id=twem) twem.refresh(self.r) conf = {} for key in schema.MonacoTwem.HASH_KEYS: if hasattr(twem, key) and key in self.CONF_KEYS: conf[key] = getattr(twem, key) conf['listen'] = twem.listen conf['auto_eject_hosts'] = twem.auto_eject_hosts conf['redis'] = True conf['servers'] = [] if len(twem.servers) == 1: # configure to proxy across the master and slaves of a single monaco db, using physical hostnames app = schema.App(app_id=twem.servers[0]) app.refresh(self.r) monaco = schema.Monaco() monaco.refresh(self.r) for node_id in app.nodes: node = schema.MonacoNode(node_id=node_id) node.refresh(self.r) conf['servers'].append('%s:%s:1' % (node.FQDN, app.port)) else: # configure to proxy across a set of monaco dbs, using the loadbalanced hostname for app_id in twem.servers: conf['servers'].append( '%s:%s:1' % (config['loadbalancer']['hostname'], app_id)) # Allow for external servers that are manually specified if twem.extservers: for server in twem.extservers: conf['servers'].append('%s:1' % server) return {twem.name: conf} except redis.RedisError, err: self.r = redis.StrictRedis(port=config['mgmt_port']) if retry: return self.twem_conf_struct(twem, retry=False) else: self.logger.exception(err)
def app_health(app_id): ''' Reports the health and replication info of a Monaco DB ''' # init so DB creation doesn't cause exception offset = 0 try: app = schema.App(app_id=app_id) app.refresh(self.r) if app.app_id in self.app_clients: appcli = self.app_clients[app_id] else: appcli = redis.StrictRedis(port=app.port, socket_connect_timeout=1, socket_timeout=1) self.app_clients[app_id] = appcli info = appcli.info() if info['role'] == 'slave': offset = info['slave_repl_offset'] # Reset 'strike-count' self.health_data[app_id] = 0 except redis.TimeoutError, err: # TODO evaluate self.logger.warn('App %s had a timeout accessing DB.. doing nothing!', app.app_id) return False
def app_config_handler_factory(self, app): ''' I still dislike factories - pylint :) ''' if type(app) != schema.App: app = schema.App(app_id=app) def handler(_): ''' handler for when users update config settings ''' try: if not app.exists(self.r): return # app deleted, this signal can be ignored app.refresh(self.r) if not self.node.node_id in app.nodes: return # node handler will delete the app with self.lock: if self.update_app(app): app.set_node_revision(self.node.node_id, app.revision, self.rmaster) self.logger.debug('Updated revision! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) else: self.logger.error('Failed to update! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) except Exception, exc: self.logger.exception(exc)
def list_app_api(): ''' lists apps''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) if 'owner' in request.args: apps = list_apps_by_owner(request['owner']) elif 'operator' in request.args: apps = list_apps_by_operator(set(request['operator'])) else: apps = list_apps() app_data = {} for app_id in apps: try: dbapp = schema.App(app_id=app_id) dbapp.refresh(r) masternode = schema.MonacoNode(node_id=dbapp.master) masternode.refresh(r) mastercli = dbapp.get_master_connection(r) info = mastercli.info() used = float(info['used_memory']) / (1024 * 1024) total = int(dbapp.maxmemory) // (1024 * 1024) percent = round((100 * used) / total, 2) used = round(used, 2) app_data[app_id] = { 'service': dbapp.name, 'exposure': 'tcp://%s:%s' % (masternode.FQDN, dbapp.port), 'memory_used': used, 'memory_total': total, 'memory_percent': percent, 'connected_clients': info['connected_clients'], 'rps': info['instantaneous_ops_per_sec'], } except Exception: app_data[app_id] = {'service': monaco.service_by_app_id[app_id]} return jsonify(app_data)
def proxy_view(twem_id): ''' Templates the proxy view ''' r = rediscli() monaco = schema.Monaco() monaco.refresh(r) if not str(twem_id) in monaco.twem_ids: abort(404) twem = schema.MonacoTwem(twem_id=twem_id) twem.refresh(r) data = {} data['twem_id'] = twem_id data['name'] = twem.name data['servers'] = twem.servers data['extservers'] = twem.extservers data['dbinfo'] = {} for app_id in twem.servers: # Get usage info on all backend DBs dbapp = schema.App(app_id=app_id) dbapp.refresh(r) mastercli = dbapp.get_master_connection(r) info = mastercli.info() used = float(info['used_memory']) / (1024 * 1024) total = int(dbapp.maxmemory) // (1024 * 1024) percent = round((100 * used) / total, 2) used = round(used, 2) data['dbinfo'][app_id] = {} data['dbinfo'][app_id]['total'] = total data['dbinfo'][app_id]['used'] = used data['dbinfo'][app_id]['percent'] = percent data['distribution'] = twem.distribution data['owner'] = twem.owner data['operator'] = twem.operator # choices for servers data['all_servers'] = [app_id for app_id in list_apps()] return render_template('proxy.html', **data)
def app_handler_factory(self, app): ''' returns a non-class method accessing class objects.. I could just pass a self.method probably ''' if type(app) != schema.App: app = schema.App(app_id=app) def handler(_): ''' handler for when an app this node manages has had is cluster modified ''' try: if not app.exists(self.r): return # already deleted if deleted app.refresh(self.r) if not self.node.node_id in app.nodes: return # node handler will delete the app with self.lock: if self.update_app(app): app.set_node_revision(self.node.node_id, app.revision, self.rmaster) self.logger.debug('Updated revision! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) else: self.logger.error('Failed to update! node:%s, app:%s, revision:%s', self.node.node_id, app.app_id, app.revision) except Exception, exc: self.logger.exception(exc)
def app_api(app_id): ''' App API: GET: 200 - gets info about app in json format 404 - app_id does not exist 403 - you aint allowed HEAD: 200 - app_id exists 404 - app_id does not exist POST: 200 - sent update command to master 400 - app_id does not exist 403 - you aint allowed DELETE: 200 - sent delete command to master 404 - app_id does not exist 403 - you aint allowed ''' r = rediscli() job_queue = schema.MonacoJobQueue(r) monaco = schema.Monaco() monaco.refresh(r) app_id = str(app_id) if request.method == 'HEAD': if not app_id in monaco.app_ids: abort(404) return 'OK' if request.method == 'GET': if not app_id in monaco.app_ids: abort(404) dbapp = schema.App(app_id=app_id) dbapp.refresh(r) app_info = { 'app_id': dbapp.app_id, 'port': dbapp.port, 'nodes': [], 'unused_nodes': [], } for k in dbapp.HASH_KEYS: if hasattr(dbapp, k): app_info[k] = getattr(dbapp, k) for node_id, role in dbapp.nodes.iteritems(): node = schema.MonacoNode(node_id=node_id) node.refresh(r) app_info['nodes'].append({ 'host': node.hostname, 'node_id': node_id, 'role': role }) app_info['unused_nodes'] = [ node_id for node_id in monaco.node_ids if not node_id in dbapp.nodes ] return jsonify(app_info) if request.method == 'POST': if app_id in monaco.app_ids: dbapp = schema.App(app_id=app_id) dbapp.refresh(r) if request.form['name'] != monaco.service_by_app_id[app_id]: monaco.rename_app(app_id, request.form['name'], r) job = { 'command': 'update', 'app_id': app_id, } for k in schema.App.HASH_KEYS: if k in request.form: job[k] = request.form[k] if 'persist' in job: job['persist'] = True else: job['persist'] = False if 'slavelb' in job: job['slavelb'] = True else: job['slavelb'] = False jid = job_queue.pushback_job(job) return jsonify(jid=jid) else: # can't create with an app_id pre-specified. abort(400) if request.method == 'DELETE': if not app_id in monaco.app_ids: abort(404) dbapp = schema.App(app_id=app_id) dbapp.refresh(r) job = { 'command': 'delete', 'app_id': app_id, } jid = job_queue.pushback_job(job) return jsonify(jid=jid)
def main(): ''' This is a jazzier version of the node stats reporter. It will spin up N threads (where N = the number of app Masters on this node) Those threads will report stats on the config interval ''' r = redis.StrictRedis(port=config.config['mgmt_port']) monaco = schema.Monaco() monaco.refresh(r) host = config.config['hostname'] node_id = monaco.node_ids_by_hostname[host] node = schema.MonacoNode(node_id=node_id) monaco_handler = MonacoHandler(node_id) monaco_handler.start() app_threadmap = {} twem_threadmap = {} while True: try: node.refresh(r) # Set up this node's master DB handlers for app_id in app_threadmap.keys(): if app_id not in node.apps: # child thread should die a natural, painless death app_threadmap[app_id].stop() del app_threadmap[app_id] STATLOGGER.debug('deleted %s', app_id) for app_id in node.apps: app = schema.App(app_id=app_id) app.refresh(r) if app.nodes[node.node_id] != 'master': if app_id in app_threadmap: app_threadmap[app_id].stop() del app_threadmap[app_id] STATLOGGER.debug('deleted %s', app_id) continue if not app_id in app_threadmap: # perhaps a new thing app_threadmap[app_id] = AppHandler(app_id, node_id) app_threadmap[app_id].start() STATLOGGER.debug('started %s', app_id) elif not app_threadmap[app_id].is_alive(): del app_threadmap[app_id] app_threadmap[app_id] = AppHandler(app_id, node_id) app_threadmap[app_id].start() STATLOGGER.info('restarted %s', app_id) # Set up this node's twem handlers for twem_id in twem_threadmap.keys(): if twem_id not in node.twems: # child thread should die a natural, painless death twem_threadmap[twem_id].stop() del twem_threadmap[twem_id] STATLOGGER.debug('deleted %s', twem_id) for twem_id in node.twems: twem = schema.MonacoTwem(twem_id=twem_id) twem.refresh(r) if not twem_id in twem_threadmap: # perhaps a new thing twem_threadmap[twem_id] = TwemHandler(twem_id, node_id, host) twem_threadmap[twem_id].start() STATLOGGER.debug('started %s', twem_id) elif not twem_threadmap[twem_id].is_alive(): del twem_threadmap[twem_id] twem_threadmap[twem_id] = TwemHandler(twem_id, node_id, host) twem_threadmap[twem_id].start() STATLOGGER.info('restarted %s', twem_id) except redis.RedisError: r = redis.StrictRedis(port=config.config['mgmt_port']) except Exception, exc: STATLOGGER.exception(exc) time.sleep(5)