def run(self): self.db_manager = util.DBManager(self.config['connections']['mysql']) db = self.db_manager.get_db() while True: # XXX next try-except block force load tables try: db.messages db.servers db.server_properties break except db_exc.OperationalError: LOG.error(util.exc_info()) time.sleep(5) LOG.debug('Reconnecting...') try: global pool persistent_gm_workers = [_GMWorker(self) for i in range(2)] for wrk in persistent_gm_workers: wrk.register_task('message.send', send) pool.add(gevent.spawn(wrk.work)) gevent.spawn(heartbeat, 10, self) pool.join() except: LOG.critical(util.exc_info()) sys.exit(1)
def _get_mem_info(self, hsp): try: mem = hsp.sysinfo.mem_info(timeout=1) except: LOG.error(util.exc_info()) mem = { 'total_swap': 'U', 'avail_swap': 'U', 'total_real': 'U', 'total_free': 'U', 'shared': 'U', 'buffer': 'U', 'cached': 'U' } return { 'mem': { 'swap': mem['total_swap'], 'swapavail': mem['avail_swap'], 'total': mem['total_real'], 'avail': 0, # Fix Me 'free': mem['total_free'], 'shared': mem['shared'], 'buffer': mem['buffer'], 'cached': mem['cached'] } }
def _get_la_stat(self, hsp): try: la = hsp.sysinfo.load_average(timeout=1) except: LOG.error(util.exc_info()) la = ['U', 'U', 'U'] return {'la': {'la1': la[0], 'la5': la[1], 'la15': la[2]}}
def _get_net_stat(self, hsp): try: net = hsp.sysinfo.net_stats(timeout=1) except: LOG.error(util.exc_info()) net = {"eth0": {"receive": {"bytes": "U"}, "transmit": {"bytes": "U"}}} return {"net": {"in": net["eth0"]["receive"]["bytes"], "out": net["eth0"]["transmit"]["bytes"]}}
def _get_mem_info(self, hsp): try: mem = hsp.sysinfo.mem_info(timeout=1) except: LOG.error(util.exc_info()) mem = { "total_swap": "U", "avail_swap": "U", "total_real": "U", "total_free": "U", "shared": "U", "buffer": "U", "cached": "U", } return { "mem": { "swap": mem["total_swap"], "swapavail": mem["avail_swap"], "total": mem["total_real"], "avail": 0, # Fix Me "free": mem["total_free"], "shared": mem["shared"], "buffer": mem["buffer"], "cached": mem["cached"], } }
def _get_la_stat(self, hsp): try: la = hsp.sysinfo.load_average(timeout=1) except: LOG.error(util.exc_info()) la = ["U", "U", "U"] return {"la": {"la1": la[0], "la5": la[1], "la15": la[2]}}
def _get_cpu_stat(self, hsp): try: cpu = hsp.sysinfo.cpu_stat(timeout=1) except: LOG.error(util.exc_info()) cpu = {"user": "******", "nice": "U", "system": "U", "idle": "U"} return {"cpu": {"user": cpu["user"], "system": cpu["system"], "nice": cpu["nice"], "idle": cpu["idle"]}}
def send(worker, raw_message_id): worker.is_working = True LOG.debug('Get message id: %s' %str(raw_message_id)) try: worker.send(raw_message_id.data) except: LOG.error(util.exc_info()) finally: worker.is_working = False return raw_message_id.data
def work(self, time_to_work=None): if time_to_work: self._terminate_time = time.time() + time_to_work self.parent.gm_workers.append(self) while True: try: gearman.GearmanWorker.work(self, poll_timeout=2) self.parent.gm_workers.remove(self) break except db_exc.OperationalError: LOG.error(util.exc_info()) gevent.sleep(5) except (gearman.errors.ConnectionError, gearman.errors.ServerUnavailable): LOG.error(util.exc_info()) gevent.sleep(5) except: LOG.error(util.exc_info()) raise
def send(worker, raw_message_id): worker.is_working = True LOG.debug('Get message id: %s' % str(raw_message_id)) try: worker.send(raw_message_id.data) except: LOG.error(util.exc_info()) finally: worker.is_working = False return raw_message_id.data
def main(): try: parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group() group.add_argument('--start', action='store_true', default=False, help='start daemon') group.add_argument('--stop', action='store_true', default=False, help='stop daemon') group.add_argument('--restart', action='store_true', default=False, help='restart daemon') parser.add_argument('--pid', default=None, help="Pid file") parser.add_argument('-c', '--config', default=ETC_DIR+'/config.yml', help='config file') parser.add_argument('-v', '--verbosity', action='count', default=0, help='increase output verbosity [0:4]. default is 0') parser.add_argument('--version', action='version', version='Version %s' %__version__) args = parser.parse_args() try: config = yaml.safe_load( open(args.config))['scalr']['msg_sender']['worker'] except: sys.stderr.write('Error load config file %s. Exit\n' %args.config) sys.exit(1) configure(args, config) daemon = MessagingWorker(config) if args.start: print 'start' LOG.info('start') daemon.start() elif args.stop: print 'stop' LOG.info('stop') daemon.stop() elif args.restart: print 'restart' LOG.info('restart') daemon.restart() else: print 'Usage %s -h' % sys.argv[0] except SystemExit: pass except: traceback.print_exc() LOG.critical(util.exc_info()) sys.exit(1)
def _get_io_stat(self, hsp): try: io = hsp.sysinfo.disk_stats(timeout=1) except: LOG.error(util.exc_info()) io = {} io = dict((str(dev), { 'read': io[dev]['read']['num'], 'write': io[dev]['write']['num'], 'rbyte': io[dev]['read']['bytes'], 'wbyte': io[dev]['write']['bytes'] }) for dev in io) return {'io': io}
def _get_cpu_stat(self, hsp): try: cpu = hsp.sysinfo.cpu_stat(timeout=1) except: LOG.error(util.exc_info()) cpu = {'user': '******', 'nice': 'U', 'system': 'U', 'idle': 'U'} return { 'cpu': { 'user': cpu['user'], 'system': cpu['system'], 'nice': cpu['nice'], 'idle': cpu['idle'] } }
def _set_new_gm_server(self, block=False): while True: if self.gm_host: self.gm_hosts.append(self.gm_host) for i in range(len(self.gm_hosts)): LOG.debug('Trying set new gearman server...') try: self.gm_host = self.gm_hosts.pop(0) gm_adm_client = gearman.GearmanAdminClient([self.gm_host]) gm_adm_client.ping_server() ## GearmanAdminClient.send_maxqueue - python gearman client bug #try: # gm_adm_client.send_maxqueue('message.send', QUEUE_SIZE) #except: # LOG.warning(util.exc_info()) self.gm_client = gearman.GearmanClient([self.gm_host]) LOG.debug('Gearman server: %s' % self.gm_host) break except gearman.errors.ServerUnavailable: LOG.error('%s %s' % (self.gm_host, util.exc_info())) self.gm_hosts.append(self.gm_host) self.gm_host = None except: LOG.error(util.exc_info()) raise else: if block: time.sleep(5) continue else: LOG.error('Set new gearman server failed') raise gearman.errors.ServerUnavailable break
def _set_new_gm_server(self, block=False): while True: if self.gm_host: self.gm_hosts.append(self.gm_host) for i in range(len(self.gm_hosts)): LOG.debug("Trying set new gearman server...") try: self.gm_host = self.gm_hosts.pop(0) gm_adm_client = gearman.GearmanAdminClient([self.gm_host]) gm_adm_client.ping_server() ## GearmanAdminClient.send_maxqueue - python gearman client bug # try: # gm_adm_client.send_maxqueue('message.send', QUEUE_SIZE) # except: # LOG.warning(util.exc_info()) self.gm_client = gearman.GearmanClient([self.gm_host]) LOG.debug("Gearman server: %s" % self.gm_host) break except gearman.errors.ServerUnavailable: LOG.error("%s %s" % (self.gm_host, util.exc_info())) self.gm_hosts.append(self.gm_host) self.gm_host = None except: LOG.error(util.exc_info()) raise else: if block: time.sleep(5) continue else: LOG.error("Set new gearman server failed") raise gearman.errors.ServerUnavailable break
def main(): try: parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group() group.add_argument("--start", action="store_true", default=False, help="start daemon") group.add_argument("--stop", action="store_true", default=False, help="stop daemon") group.add_argument("--restart", action="store_true", default=False, help="restart daemon") parser.add_argument("--pid", default=None, help="Pid file") parser.add_argument("-c", "--config", default=ETC_DIR + "/config.yml", help="config file") parser.add_argument( "-v", "--verbosity", action="count", default=0, help="increase output verbosity [0:4]. default is 0" ) parser.add_argument("--version", action="version", version="Version %s" % __version__) args = parser.parse_args() try: config = yaml.safe_load(open(args.config))["scalr"]["msg_sender"]["client"] except: sys.stderr.write("Error load config file %s. Exit\n" % args.config) sys.exit(1) configure(args, config) daemon = MessagingClient(config) if args.start: print "start" LOG.info("start") daemon.start() elif args.stop: print "stop" LOG.info("stop") daemon.stop() elif args.restart: print "restart" LOG.info("restart") daemon.restart() else: print "Usage %s -h" % sys.argv[0] except SystemExit: pass except: LOG.critical(util.exc_info()) sys.exit(1)
def _get_io_stat(self, hsp): try: io = hsp.sysinfo.disk_stats(timeout=1) except: LOG.error(util.exc_info()) io = {} io = dict( ( str(dev), { "read": io[dev]["read"]["num"], "write": io[dev]["write"]["num"], "rbyte": io[dev]["read"]["bytes"], "wbyte": io[dev]["write"]["bytes"], }, ) for dev in io ) return {"io": io}
def _get_net_stat(self, hsp): try: net = hsp.sysinfo.net_stats(timeout=1) except: LOG.error(util.exc_info()) net = { 'eth0': { 'receive': { 'bytes': 'U' }, 'transmit': { 'bytes': 'U' } } } return { 'net': { 'in': net['eth0']['receive']['bytes'], 'out': net['eth0']['transmit']['bytes'] } }
def run(self): db_manager = util.DBManager(self.config["connections"]["mysql"]) db = db_manager.get_db() self._set_new_gm_server(block=True) timestep = 5 while True: session = db.session try: gm_adm_client = gearman.GearmanAdminClient([self.gm_host]) gm_adm_client.ping_server() # fix gearman v2.0.2 memory leak bug self.gm_client = gearman.GearmanClient([self.gm_host]) self._update_submitted_jobs() if len(self.submitted_jobs) > 5000: LOG.warning("Too much of a submitted jobs. Skip iteration") time.sleep(timestep) continue where1 = and_(db.messages.type == "out", db.messages.status == 0, db.messages.message_version == 2) where2 = and_( func.unix_timestamp(db.messages.dtlasthandleattempt) + db.messages.handle_attempts * CRATIO < func.unix_timestamp(func.now()) ) if self.submitted_jobs: where3 = and_(not_(db.messages.messageid.in_(self.submitted_jobs.keys()))) msgs = ( session.query(db.messages.messageid, db.messages.handle_attempts) .filter(where1, where2, where3) .order_by(asc(db.messages.id)) .all()[0:QSIZE] ) else: msgs = ( session.query(db.messages.messageid, db.messages.handle_attempts) .filter(where1, where2) .order_by(asc(db.messages.id)) .all()[0:QSIZE] ) for msg in msgs: # simple unique version req = self.gm_client.submit_job( "message.send", msg.messageid, unique=msg.messageid[0:64], wait_until_complete=False ) # sha256 unique version """ req = self.gm_client.submit_job('message.send', msg.messageid, unique=hashlib.sha256(msg.messageid).hexdigest(), wait_until_complete=False) """ self.gm_client.wait_until_jobs_accepted([req]) self.submitted_jobs.update( {msg.messageid: (req, int(time.time() + CRATIO * (msg.handle_attempts + 1)))} ) LOG.info("Sumbit message: msg_id:%s" % msg.messageid) except db_exc.OperationalError: LOG.error(util.exc_info()) time.sleep(5) except gearman.errors.ServerUnavailable: LOG.error(util.exc_info()) self._set_new_gm_server(block=True) except: LOG.error(util.exc_info()) raise finally: session.close() session.remove() time.sleep(timestep)
def main(): try: parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group() group.add_argument('--start', action='store_true', default=False, help='start daemon') group.add_argument('--stop', action='store_true', default=False, help='stop daemon') group.add_argument('--restart', action='store_true', default=False, help='restart daemon') parser.add_argument('--pid', default=None, help="Pid file") parser.add_argument('-c', '--config', default=ETC_DIR + '/config.yml', help='config file') parser.add_argument( '-v', '--verbosity', action='count', default=0, help='increase output verbosity [0:4]. default is 0') parser.add_argument('--version', action='version', version='Version %s' % __version__) args = parser.parse_args() try: config = yaml.safe_load(open( args.config))['scalr']['msg_sender']['worker'] except: sys.stderr.write('Error load config file %s. Exit\n' % args.config) sys.exit(1) configure(args, config) daemon = MessagingWorker(config) if args.start: print 'start' LOG.info('start') daemon.start() elif args.stop: print 'stop' LOG.info('stop') daemon.stop() elif args.restart: print 'restart' LOG.info('restart') daemon.restart() else: print 'Usage %s -h' % sys.argv[0] except SystemExit: pass except: traceback.print_exc() LOG.critical(util.exc_info()) sys.exit(1)
def send(self, message_id): LOG.debug('Processing message_id: %s' % message_id) self._spawn_gm_worker() db = self.parent.db_manager.get_db() msg = db.messages.filter_by(messageid=message_id).first() if not msg: LOG.warning('Message with message_id: %s not found' % message_id) db.session.close() db.session.remove() return try: srv = db.servers.filter_by(server_id=msg.server_id).first() if not srv: LOG.warning('Server with server_id: %s not found' % msg.server_id) msg.handle_attempts += 1 msg.status = 0 if msg.handle_attempts < 3 else 3 msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() return if not _server_is_active(srv): LOG.warning('Server not active: %s' % message_id) msg.handle_attempts = 3 msg.status = 3 msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() return where = and_(db.server_properties.server_id == msg.server_id, db.server_properties.name == 'scalarizr.key') key = db.server_properties.filter(where).first().value try: where = and_( db.server_properties.server_id == msg.server_id, db.server_properties.name == 'scalarizr.ctrl_port') port = db.server_properties.filter(where).first().value except: port = 8013 db.session.close() db.session.remove() data, headers = _encrypt(msg.server_id, key, msg.message) url = 'http://%s:%s/%s' % (srv.remote_ip, port, 'control') req = urllib2.Request(url, data, headers) try: g = gevent.spawn(urllib2.urlopen, req) g.get(timeout=5) if g.value.code != 201: raise DeliveryError(g.value.code, 'Delivery failed') except: LOG.warning('Delivery failed: %s %s' % (message_id, util.exc_info())) g.kill() db.session.add(msg) msg.handle_attempts += 1 msg.status = 0 if msg.handle_attempts < 3 else 3 msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() return db.session.add(msg) msg.status = 1 msg.message = '' if msg.message_name == 'ExecScript': db.delete(msg) msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() except: LOG.error(util.exc_info()) finally: # Don't close or remove session in finally #db.session.close() #db.session.remove() pass
def run(self): db_manager = util.DBManager(self.config['connections']['mysql']) db = db_manager.get_db() self._set_new_gm_server(block=True) timestep = 5 while True: session = db.session try: gm_adm_client = gearman.GearmanAdminClient([self.gm_host]) gm_adm_client.ping_server() # fix gearman v2.0.2 memory leak bug self.gm_client = gearman.GearmanClient([self.gm_host]) self._update_submitted_jobs() if len(self.submitted_jobs) > 5000: LOG.warning('Too much of a submitted jobs. Skip iteration') time.sleep(timestep) continue where1 = and_(db.messages.type == 'out', db.messages.status == 0, db.messages.message_version == 2) where2 = and_( func.unix_timestamp(db.messages.dtlasthandleattempt) +\ db.messages.handle_attempts *\ CRATIO < func.unix_timestamp(func.now())) if self.submitted_jobs: where3 = and_( not_( db.messages.messageid.in_( self.submitted_jobs.keys()))) msgs = session.query( db.messages.messageid, db.messages.handle_attempts).filter( where1, where2, where3).order_by( asc(db.messages.id)).all()[0:QSIZE] else: msgs = session.query( db.messages.messageid, db.messages.handle_attempts).filter( where1, where2).order_by(asc( db.messages.id)).all()[0:QSIZE] for msg in msgs: # simple unique version req = self.gm_client.submit_job('message.send', msg.messageid, unique=msg.messageid[0:64], wait_until_complete=False) # sha256 unique version ''' req = self.gm_client.submit_job('message.send', msg.messageid, unique=hashlib.sha256(msg.messageid).hexdigest(), wait_until_complete=False) ''' self.gm_client.wait_until_jobs_accepted([req]) self.submitted_jobs.update({ msg.messageid: (req, int(time.time() + CRATIO * (msg.handle_attempts + 1))) }) LOG.info('Sumbit message: msg_id:%s' % msg.messageid) except db_exc.OperationalError: LOG.error(util.exc_info()) time.sleep(5) except gearman.errors.ServerUnavailable: LOG.error(util.exc_info()) self._set_new_gm_server(block=True) except: LOG.error(util.exc_info()) raise finally: session.close() session.remove() time.sleep(timestep)
def __call__(self, mode, metric): try: self._work(mode, metric) except: LOG.error(util.exc_info())
def main(): try: parser = argparse.ArgumentParser() parser.add_argument( "-i", "--interval", type=int, default=0, help="execution interval in seconds. Default is 0 - exec once" ) parser.add_argument("--pid", default=None, help="Pid file") parser.add_argument( "-m", "--metric", choices=["cpu", "la", "mem", "net", "io"], action="append", help="metric type for processing.\ 'io' metric is supported only in a 'scalarizr_api mode'", ) parser.add_argument("-M", "--mode", choices=["snmp", "scalarizr_api"]) parser.add_argument("-c", "--config", default=ETC_DIR + "/config.yml", help="config file") parser.add_argument( "-v", "--verbosity", action="count", default=0, help="increase output verbosity [0:4]. default is 0" ) parser.add_argument("--version", action="version", version="Version %s" % __version__) args = parser.parse_args() try: config = yaml.safe_load(open(args.config))["scalr"]["stats_poller"] except: sys.stderr.write("Error load config file %s. Exit\n" % args.config) sys.exit(1) if args.mode: mode = args.mode elif "mode" in config: mode = config["mode"] else: mode = "snmp" if args.metric: metric = args.metric elif "metric" in config: metric = config["metric"] else: metric = ["cpu", "la", "mem", "net"] if mode == "snmp": metric = list(set(metric).intersection(set(["cpu", "la", "mem", "net"]))) elif mode == "scalarizr_api": metric = list(set(metric).intersection(set(["cpu", "la", "mem", "net", "io"]))) configure(args, config) check_pid() LOG.debug("Start") while True: start_time = time.time() LOG.info("Start time: %s" % time.ctime()) try: scheduler(config["connections"]["mysql"], mode, metric) except KeyboardInterrupt: raise except db_exc.OperationalError: LOG.error(util.exc_info()) except: LOG.error(util.exc_info()) raise LOG.info("Working time: %s" % (time.time() - start_time)) if not args.interval: break sleep_time = start_time + args.interval - time.time() if sleep_time > 0: time.sleep(sleep_time) LOG.debug("Exit") except KeyboardInterrupt: LOG.critical("KeyboardInterrupt") sys.exit(0) except SystemExit: pass except: LOG.critical(util.exc_info()) sys.exit(1)
def send(self, message_id): LOG.debug('Processing message_id: %s' %message_id) self._spawn_gm_worker() db = self.parent.db_manager.get_db() msg = db.messages.filter_by(messageid=message_id).first() if not msg: LOG.warning('Message with message_id: %s not found' %message_id) db.session.close() db.session.remove() return try: srv = db.servers.filter_by(server_id=msg.server_id).first() if not srv: LOG.warning('Server with server_id: %s not found' %msg.server_id) msg.handle_attempts += 1 msg.status = 0 if msg.handle_attempts < 3 else 3 msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() return if not _server_is_active(srv): LOG.warning('Server not active: %s' %message_id) msg.handle_attempts = 3 msg.status = 3 msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() return where = and_( db.server_properties.server_id==msg.server_id, db.server_properties.name=='scalarizr.key') key = db.server_properties.filter(where).first().value try: where = and_( db.server_properties.server_id==msg.server_id, db.server_properties.name=='scalarizr.ctrl_port') port = db.server_properties.filter(where).first().value except: port = 8013 db.session.close() db.session.remove() data, headers = _encrypt(msg.server_id, key, msg.message) url = 'http://%s:%s/%s' % (srv.remote_ip, port, 'control') req = urllib2.Request(url, data, headers) try: g = gevent.spawn(urllib2.urlopen, req) g.get(timeout=5) if g.value.code != 201: raise DeliveryError(g.value.code, 'Delivery failed') except: LOG.warning('Delivery failed: %s %s' %(message_id, util.exc_info())) g.kill() db.session.add(msg) msg.handle_attempts += 1 msg.status = 0 if msg.handle_attempts < 3 else 3 msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() return db.session.add(msg) msg.status = 1 msg.message = '' if msg.message_name == 'ExecScript': db.delete(msg) msg.dtlasthandleattempt = func.now() db.commit() db.session.close() db.session.remove() except: LOG.error(util.exc_info()) finally: # Don't close or remove session in finally #db.session.close() #db.session.remove() pass
def main(): try: parser = argparse.ArgumentParser() parser.add_argument( '-i', '--interval', type=int, default=0, help="execution interval in seconds. Default is 0 - exec once") parser.add_argument('--pid', default=None, help="Pid file") parser.add_argument('-m', '--metric', choices=['cpu', 'la', 'mem', 'net', 'io'], action='append', help="metric type for processing.\ 'io' metric is supported only in a 'scalarizr_api mode'") parser.add_argument('-M', '--mode', choices=['snmp', 'scalarizr_api']) parser.add_argument('-c', '--config', default=ETC_DIR + '/config.yml', help='config file') parser.add_argument( '-v', '--verbosity', action='count', default=0, help='increase output verbosity [0:4]. default is 0') parser.add_argument('--version', action='version', version='Version %s' % __version__) args = parser.parse_args() try: config = yaml.safe_load(open(args.config))['scalr']['stats_poller'] except: sys.stderr.write('Error load config file %s. Exit\n' % args.config) sys.exit(1) if args.mode: mode = args.mode elif 'mode' in config: mode = config['mode'] else: mode = 'snmp' if args.metric: metric = args.metric elif 'metric' in config: metric = config['metric'] else: metric = ['cpu', 'la', 'mem', 'net'] if mode == 'snmp': metric = list( set(metric).intersection(set(['cpu', 'la', 'mem', 'net']))) elif mode == 'scalarizr_api': metric = list( set(metric).intersection(set(['cpu', 'la', 'mem', 'net', 'io']))) configure(args, config) check_pid() LOG.debug('Start') while True: start_time = time.time() LOG.info('Start time: %s' % time.ctime()) try: scheduler(config['connections']['mysql'], mode, metric) except KeyboardInterrupt: raise except db_exc.OperationalError: LOG.error(util.exc_info()) except: LOG.error(util.exc_info()) raise LOG.info('Working time: %s' % (time.time() - start_time)) if not args.interval: break sleep_time = start_time + args.interval - time.time() if sleep_time > 0: time.sleep(sleep_time) LOG.debug('Exit') except KeyboardInterrupt: LOG.critical('KeyboardInterrupt') sys.exit(0) except SystemExit: pass except: LOG.critical(util.exc_info()) sys.exit(1)