def publish_container_stats(): """ Publish custom cloudwatch statistics. Used for status monitoring and auto scaling. """ nactive = JBoxContainer.num_active() CloudHost.publish_stats("NumActiveContainers", "Count", nactive) curr_cpu_used_pct = psutil.cpu_percent() last_cpu_used_pct = curr_cpu_used_pct if JBoxContainer.LAST_CPU_PCT is None else JBoxContainer.LAST_CPU_PCT JBoxContainer.LAST_CPU_PCT = curr_cpu_used_pct cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct)/2) mem_used_pct = psutil.virtual_memory().percent CloudHost.publish_stats("MemUsed", "Percent", mem_used_pct) disk_used_pct = 0 for x in psutil.disk_partitions(): if not VolMgr.is_mount_path(x.mountpoint): try: disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct) except: pass if JBoxContainer.INITIAL_DISK_USED_PCT is None: JBoxContainer.INITIAL_DISK_USED_PCT = disk_used_pct disk_used_pct = max(0, (disk_used_pct - JBoxContainer.INITIAL_DISK_USED_PCT)) CloudHost.publish_stats("DiskUsed", "Percent", disk_used_pct) cont_load_pct = min(100, max(0, nactive * 100 / JBoxContainer.MAX_CONTAINERS)) CloudHost.publish_stats("ContainersUsed", "Percent", cont_load_pct) CloudHost.publish_stats("DiskIdsUsed", "Percent", VolMgr.used_pct()) overall_load_pct = max(cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct()) CloudHost.publish_stats("Load", "Percent", overall_load_pct)
def collect_stats(): try: VolMgr.publish_stats() db.publish_stats() JBoxDynConfig.set_stat_collected_date(CloudHost.INSTALL_ID) finally: JBoxd.finish_thread()
def init(): dckr = docker.Client() cfg = read_config() cloud_cfg = cfg['cloud_host'] cloud_cfg['backup_bucket'] = "juliabox_userbackup" LoggerMixin.setup_logger(level=cfg['root_log_level']) LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level'] CloudHost.configure( has_s3=True, #cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg)
def __init__(self): LoggerMixin.configure() db.configure() Compute.configure() SessContainer.configure() VolMgr.configure() JBoxAsyncJob.configure() JBoxAsyncJob.init(JBoxAsyncJob.MODE_PUB) self.application = tornado.web.Application(handlers=[ (r"/", MainHandler), (r"/jboxadmin/", AdminHandler), (r"/jboxping/", PingHandler), (r"/jboxcors/", CorsHandler) ]) JBPluginHandler.add_plugin_handlers(self.application) JBPluginUI.create_include_files() # cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32)) # use sesskey as cookie secret to be able to span multiple tornado servers self.application.settings["cookie_secret"] = JBoxCfg.get('sesskey') self.application.settings["plugin_features"] = JBox.get_pluggedin_features() self.application.listen(JBoxCfg.get('interactive.manager_port'), address=socket.gethostname()) self.application.listen(JBoxCfg.get('interactive.manager_port'), address='localhost') self.ioloop = tornado.ioloop.IOLoop.instance() # run container maintainence every 5 minutes run_interval = 5 * 60 * 1000 self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes") self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop) self.sigct = tornado.ioloop.PeriodicCallback(JBox.do_signals, 1000, self.ioloop)
def run(self): if VolMgr.has_update_for_user_home_image(): VolMgr.update_user_home_image(fetch=False) while True: self.log_debug("JBox daemon waiting for commands...") cmd, data = self.queue.recv() if cmd == JBoxAsyncJob.CMD_BACKUP_CLEANUP: args = (data,) fn = JBoxd.backup_and_cleanup elif cmd == JBoxAsyncJob.CMD_LAUNCH_SESSION: args = (data[0], data[1], data[2]) fn = JBoxd.launch_session elif cmd == JBoxAsyncJob.CMD_AUTO_ACTIVATE: args = () fn = JBoxd.auto_activate elif cmd == JBoxAsyncJob.CMD_UPDATE_USER_HOME_IMAGE: args = () fn = JBoxd.update_user_home_image elif cmd == JBoxAsyncJob.CMD_REFRESH_DISKS: args = () fn = JBoxd.refresh_disks elif cmd == JBoxAsyncJob.CMD_COLLECT_STATS: args = () fn = JBoxd.collect_stats else: self.log_error("Unknown command " + str(cmd)) continue JBoxd.schedule_thread(cmd, fn, args)
def run(self): if VolMgr.has_update_for_user_home_image(): VolMgr.update_user_home_image(fetch=False) while True: self.log_debug("JBox daemon waiting for commands...") cmd, data = self.queue.recv() if cmd == JBoxAsyncJob.CMD_BACKUP_CLEANUP: args = (data, ) fn = JBoxd.backup_and_cleanup elif cmd == JBoxAsyncJob.CMD_LAUNCH_SESSION: args = (data[0], data[1], data[2]) fn = JBoxd.launch_session elif cmd == JBoxAsyncJob.CMD_AUTO_ACTIVATE: args = () fn = JBoxd.auto_activate elif cmd == JBoxAsyncJob.CMD_UPDATE_USER_HOME_IMAGE: args = () fn = JBoxd.update_user_home_image elif cmd == JBoxAsyncJob.CMD_REFRESH_DISKS: args = () fn = JBoxd.refresh_disks elif cmd == JBoxAsyncJob.CMD_COLLECT_STATS: args = () fn = JBoxd.collect_stats else: self.log_error("Unknown command " + str(cmd)) continue JBoxd.schedule_thread(cmd, fn, args)
def __init__(self): dckr = docker.Client() cfg = read_config() cloud_cfg = cfg['cloud_host'] user_activation_cfg = cfg['user_activation'] LoggerMixin.setup_logger(level=cfg['root_log_level']) LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level'] db.configure_db(cfg) CloudHost.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['numlocalmax'], cfg['async_job_port'], async_mode=JBoxAsyncJob.MODE_SUB) self.log_debug("Backup daemon listening on port: " + str(cfg['async_job_port'])) self.queue = JBoxContainer.ASYNC_JOB JBoxd.MAX_ACTIVATIONS_PER_SEC = user_activation_cfg['max_activations_per_sec'] JBoxd.MAX_AUTO_ACTIVATIONS_PER_RUN = user_activation_cfg['max_activations_per_run'] JBoxd.ACTIVATION_SUBJECT = user_activation_cfg['mail_subject'] JBoxd.ACTIVATION_BODY = user_activation_cfg['mail_body'] JBoxd.ACTIVATION_SENDER = user_activation_cfg['sender']
def launch_session(name, email, reuse=True): try: JBoxd._wait_for_session_backup(name) VolMgr.refresh_disk_use_status() JBoxContainer.launch_by_name(name, email, reuse=reuse) finally: JBoxd.finish_thread()
def run(self): Compute.deregister_instance_dns() Compute.register_instance_dns() JBoxd.publish_perf_counters() JBoxd.log_debug("Setting up signal handlers") signal.signal(signal.SIGINT, JBoxd.signal_handler) signal.signal(signal.SIGTERM, JBoxd.signal_handler) if VolMgr.has_update_for_user_home_image(): VolMgr.update_user_home_image(fetch=False) while True: self.log_debug("JBox daemon waiting for commands...") try: offline, reply_req = JBoxd.QUEUE.poll(self._is_scheduled(JBoxAsyncJob.CMD_REQ_RESP, ())) except ValueError: self.log_exception("Exception reading command. Will retry after 10 seconds") time.sleep(10) continue if offline: try: self.process_offline() except: self.log_exception("Exception scheduling request") if reply_req: JBoxd.schedule_thread(JBoxAsyncJob.CMD_REQ_RESP, JBoxd.process_and_respond, ())
def maintain(max_timeout=0, inactive_timeout=0, protected_names=()): JBoxContainer.log_info("Starting container maintenance...") tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (max_timeout > 0) else tmin stop_inacive_before = (tnow - datetime.timedelta(seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin all_containers = JBoxContainer.DCKR.containers(all=True) all_cnames = {} container_id_list = [] for cdesc in all_containers: cid = cdesc['Id'] cont = JBoxContainer(cid) container_id_list.append(cid) cname = cont.get_name() all_cnames[cname] = cid if (cname is None) or (cname in protected_names): JBoxContainer.log_debug("Ignoring %s", cont.debug_str()) continue c_is_active = cont.is_running() or cont.is_restarting() last_ping = JBoxContainer._get_last_ping(cname) # if we don't have a ping record, create one (we must have restarted) if (last_ping is None) and c_is_active: JBoxContainer.log_info("Discovered new container %s", cont.debug_str()) JBoxContainer.record_ping(cname) start_time = cont.time_started() # check that start time is not absurdly small (indicates a continer that's starting up) start_time_not_zero = (tnow-start_time).total_seconds() < (365*24*60*60) if (start_time < stop_before) and start_time_not_zero: # don't allow running beyond the limit for long running sessions # JBoxContainer.log_info("time_started " + str(cont.time_started()) + # " delete_before: " + str(delete_before) + # " cond: " + str(cont.time_started() < delete_before)) JBoxContainer.log_info("Running beyond allowed time %s", cont.debug_str()) cont.async_backup_and_cleanup() elif (last_ping is not None) and c_is_active and (last_ping < stop_inacive_before): # if inactive for too long, stop it # JBoxContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) + # " cond: " + str(last_ping < stop_before)) JBoxContainer.log_info("Inactive beyond allowed time %s", cont.debug_str()) cont.async_backup_and_cleanup() # delete ping entries for non exixtent containers for cname in JBoxContainer.PINGS.keys(): if cname not in all_cnames: del JBoxContainer.PINGS[cname] JBoxContainer.VALID_CONTAINERS = all_cnames JBoxContainer.publish_container_stats() VolMgr.refresh_disk_use_status(container_id_list=container_id_list) JBoxContainer.log_info("Finished container maintenance.")
def __init__(self): dckr = docker.Client() cfg = JBox.cfg = read_config() cloud_cfg = cfg['cloud_host'] LoggerMixin.setup_logger(level=cfg['root_log_level']) LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level'] JBoxHandler.configure(cfg) db.configure_db(cfg) CloudHost.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg) JBoxAsyncJob.configure(cfg) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['numlocalmax'], cfg['async_job_ports']) self.application = tornado.web.Application([ (r"/", MainHandler), (r"/hostlaunchipnb/", AuthHandler), (r"/hostadmin/", AdminHandler), (r"/ping/", PingHandler), (r"/cors/", CorsHandler) ]) cookie_secret = ''.join( random.choice(string.ascii_uppercase + string.digits) for x in xrange(32)) self.application.settings["cookie_secret"] = cookie_secret self.application.settings["google_oauth"] = cfg["google_oauth"] self.application.listen(cfg["port"]) self.ioloop = tornado.ioloop.IOLoop.instance() # run container maintainence every 5 minutes run_interval = 5 * 60 * 1000 self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes") self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop)
def __init__(self): dckr = docker.Client() cfg = JBox.cfg = read_config() cloud_cfg = cfg['cloud_host'] LoggerMixin.setup_logger(level=cfg['root_log_level']) LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level'] JBoxHandler.configure(cfg) db.configure_db(cfg) CloudHost.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg) JBoxAsyncJob.configure(cfg) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['numlocalmax'], cfg['async_job_ports']) self.application = tornado.web.Application([ (r"/", MainHandler), (r"/hostlaunchipnb/", AuthHandler), (r"/hostadmin/", AdminHandler), (r"/ping/", PingHandler), (r"/cors/", CorsHandler), (r"/hw/", HomeworkHandler) ]) cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32)) self.application.settings["cookie_secret"] = cookie_secret self.application.settings["google_oauth"] = cfg["google_oauth"] self.application.listen(cfg["port"]) self.ioloop = tornado.ioloop.IOLoop.instance() # run container maintainence every 5 minutes run_interval = 5 * 60 * 1000 self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes") self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop)
def start(self, email): self.refresh() JBoxContainer.log_info("Starting %s", self.debug_str()) if self.is_running() or self.is_restarting(): JBoxContainer.log_warn( "Already started %s. Browser connectivity issues?", self.debug_str()) return disk = VolMgr.get_disk_for_user(email) vols = { disk.disk_path: { 'bind': JBoxContainer.VOLUMES[0], 'ro': False } } JBoxContainer.DCKR.start( self.dockid, port_bindings=JBoxContainer.CONTAINER_PORT_BINDINGS, binds=vols) self.refresh() JBoxContainer.log_info("Started %s", self.debug_str()) cname = self.get_name() if cname is not None: JBoxContainer.record_ping(cname)
def publish_perf_counters(): """ Publish performance counters. Used for status monitoring and auto scaling. """ VolMgr.refresh_disk_use_status() nactive = BaseContainer.num_active(BaseContainer.SFX_INT) stats = [] stats.append(("NumActiveContainers", "Count", nactive)) nactive_api = BaseContainer.num_active(BaseContainer.SFX_API) stats.append(("NumActiveAPIContainers", "Count", nactive_api)) curr_cpu_used_pct = psutil.cpu_percent() last_cpu_used_pct = curr_cpu_used_pct if BaseContainer.LAST_CPU_PCT is None else BaseContainer.LAST_CPU_PCT BaseContainer.LAST_CPU_PCT = curr_cpu_used_pct cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct) / 2) stats.append(("CPUUsed", "Percent", cpu_used_pct)) mem_used_pct = psutil.virtual_memory().percent stats.append(("MemUsed", "Percent", mem_used_pct)) disk_used_pct = 0 for x in psutil.disk_partitions(): if not VolMgr.is_mount_path(x.mountpoint): try: disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct) except: pass if BaseContainer.INITIAL_DISK_USED_PCT is None: BaseContainer.INITIAL_DISK_USED_PCT = disk_used_pct disk_used_pct = max(0, (disk_used_pct - BaseContainer.INITIAL_DISK_USED_PCT)) stats.append(("DiskUsed", "Percent", disk_used_pct)) cont_load_pct = min(100, max(0, nactive * 100 / SessContainer.MAX_CONTAINERS)) stats.append(("ContainersUsed", "Percent", cont_load_pct)) api_cont_load_pct = min(100, max(0, nactive_api * 100 / APIContainer.MAX_CONTAINERS)) stats.append(("APIContainersUsed", "Percent", api_cont_load_pct)) stats.append(("DiskIdsUsed", "Percent", VolMgr.used_pct())) overall_load_pct = max( cont_load_pct, api_cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct() ) stats.append(("Load", "Percent", overall_load_pct)) Compute.publish_stats_multi(stats)
def __init__(self): dckr = docker.Client() cfg = read_config() cloud_cfg = cfg['cloud_host'] user_activation_cfg = cfg['user_activation'] LoggerMixin.setup_logger(level=cfg['root_log_level']) LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level'] db.configure_db(cfg) CloudHost.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg) JBoxAsyncJob.configure(cfg) JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'], cfg['numlocalmax'], cfg['async_job_ports'], async_mode=JBoxAsyncJob.MODE_SUB) self.log_debug("Backup daemon listening on ports: %s", repr(cfg['async_job_ports'])) JBoxd.QUEUE = JBoxContainer.ASYNC_JOB JBoxd.MAX_ACTIVATIONS_PER_SEC = user_activation_cfg[ 'max_activations_per_sec'] JBoxd.MAX_AUTO_ACTIVATIONS_PER_RUN = user_activation_cfg[ 'max_activations_per_run'] JBoxd.ACTIVATION_SUBJECT = user_activation_cfg['mail_subject'] JBoxd.ACTIVATION_BODY = user_activation_cfg['mail_body'] JBoxd.ACTIVATION_SENDER = user_activation_cfg['sender']
def publish_perf_counters(): """ Publish performance counters. Used for status monitoring and auto scaling. """ VolMgr.refresh_disk_use_status() nactive = BaseContainer.num_active(BaseContainer.SFX_INT) stats = [] stats.append(("NumActiveContainers", "Count", nactive)) nactive_api = BaseContainer.num_active(BaseContainer.SFX_API) stats.append(("NumActiveAPIContainers", "Count", nactive_api)) curr_cpu_used_pct = psutil.cpu_percent() last_cpu_used_pct = curr_cpu_used_pct if BaseContainer.LAST_CPU_PCT is None else BaseContainer.LAST_CPU_PCT BaseContainer.LAST_CPU_PCT = curr_cpu_used_pct cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct)/2) stats.append(("CPUUsed", "Percent", cpu_used_pct)) mem_used_pct = psutil.virtual_memory().percent stats.append(("MemUsed", "Percent", mem_used_pct)) disk_used_pct = 0 for x in psutil.disk_partitions(): if not VolMgr.is_mount_path(x.mountpoint): try: disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct) except: pass if BaseContainer.INITIAL_DISK_USED_PCT is None: BaseContainer.INITIAL_DISK_USED_PCT = disk_used_pct disk_used_pct = max(0, (disk_used_pct - BaseContainer.INITIAL_DISK_USED_PCT)) stats.append(("DiskUsed", "Percent", disk_used_pct)) cont_load_pct = min(100, max(0, nactive * 100 / SessContainer.MAX_CONTAINERS)) stats.append(("ContainersUsed", "Percent", cont_load_pct)) api_cont_load_pct = min(100, max(0, nactive_api * 100 / APIContainer.MAX_CONTAINERS)) stats.append(("APIContainersUsed", "Percent", api_cont_load_pct)) stats.append(("DiskIdsUsed", "Percent", VolMgr.used_pct())) overall_load_pct = max(cont_load_pct, api_cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct()) stats.append(("Load", "Percent", overall_load_pct)) Compute.publish_stats_multi(stats)
def __init__(self): LoggerMixin.configure() db.configure() Compute.configure() SessContainer.configure() APIContainer.configure() VolMgr.configure() JBoxAsyncJob.configure() JBoxAsyncJob.init(JBoxAsyncJob.MODE_SUB) self.log_debug("Container manager listening on ports: %s", repr(JBoxCfg.get('container_manager_ports'))) JBoxd.QUEUE = JBoxAsyncJob.get() JBoxd.MAX_ACTIVATIONS_PER_SEC = JBoxCfg.get('user_activation.max_activations_per_sec') JBoxd.MAX_AUTO_ACTIVATIONS_PER_RUN = JBoxCfg.get('user_activation.max_activations_per_run') JBoxd.ACTIVATION_SUBJECT = JBoxCfg.get('user_activation.mail_subject') JBoxd.ACTIVATION_BODY = JBoxCfg.get('user_activation.mail_body') JBoxd.ACTIVATION_SENDER = JBoxCfg.get('user_activation.sender')
def __init__(self): LoggerMixin.configure() db.configure() Compute.configure() SessContainer.configure() VolMgr.configure() JBoxAsyncJob.configure() JBoxAsyncJob.init(JBoxAsyncJob.MODE_PUB) self.application = tornado.web.Application( handlers=[(r"/", MainHandler), ( r"/jboxadmin/", AdminHandler), (r"/jboxping/", PingHandler), (r"/jboxcors/", CorsHandler)]) JBPluginHandler.add_plugin_handlers(self.application) JBPluginUI.create_include_files() # cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32)) # use sesskey as cookie secret to be able to span multiple tornado servers self.application.settings["cookie_secret"] = JBoxCfg.get('sesskey') self.application.settings[ "plugin_features"] = JBox.get_pluggedin_features() self.application.listen(JBoxCfg.get('interactive.manager_port'), address=socket.gethostname()) self.application.listen(JBoxCfg.get('interactive.manager_port'), address='localhost') self.ioloop = tornado.ioloop.IOLoop.instance() # run container maintainence every 5 minutes run_interval = 5 * 60 * 1000 self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes") self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop) self.sigct = tornado.ioloop.PeriodicCallback(JBox.do_signals, 1000, self.ioloop) # or configure cacerts AsyncHTTPClient.configure(None, defaults=dict(validate_cert=None))
def run(self): if VolMgr.has_update_for_user_home_image(): VolMgr.update_user_home_image(fetch=False) while True: self.log_debug("JBox daemon waiting for commands...") try: offline, reply_req = JBoxd.QUEUE.poll( self._is_scheduled(JBoxAsyncJob.CMD_REQ_RESP, ())) except ValueError: self.log_exception( "Exception reading command. Will retry after 10 seconds") time.sleep(10) continue if offline: try: self.process_offline() except: self.log_exception("Exception scheduling request") if reply_req: JBoxd.schedule_thread(JBoxAsyncJob.CMD_REQ_RESP, JBoxd.process_and_respond, ())
def delete(self, backup=False): JBoxContainer.log_info("Deleting %s", self.debug_str()) self.refresh() cname = self.get_name() if self.is_running() or self.is_restarting(): self.kill() disk = VolMgr.get_disk_from_container(self.dockid) if disk is not None: disk.release(backup=backup) if cname is not None: JBoxContainer.PINGS.pop(cname, None) JBoxContainer.DCKR.remove_container(self.dockid) JBoxContainer.log_info("Deleted %s", self.debug_str())
def init(): dckr = docker.Client() cfg = read_config() cloud_cfg = cfg['cloud_host'] cloud_cfg['backup_bucket'] = "juliabox_userbackup" LoggerMixin.setup_logger(level=cfg['root_log_level']) LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level'] CloudHost.configure(has_s3=True, #cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg)
def publish_container_stats(): """ Publish custom cloudwatch statistics. Used for status monitoring and auto scaling. """ nactive = JBoxContainer.num_active() CloudHost.publish_stats("NumActiveContainers", "Count", nactive) curr_cpu_used_pct = psutil.cpu_percent() last_cpu_used_pct = curr_cpu_used_pct if JBoxContainer.LAST_CPU_PCT is None else JBoxContainer.LAST_CPU_PCT JBoxContainer.LAST_CPU_PCT = curr_cpu_used_pct cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct) / 2) mem_used_pct = psutil.virtual_memory().percent CloudHost.publish_stats("MemUsed", "Percent", mem_used_pct) disk_used_pct = 0 for x in psutil.disk_partitions(): if not VolMgr.is_mount_path(x.mountpoint): try: disk_used_pct = max( psutil.disk_usage(x.mountpoint).percent, disk_used_pct) except: pass if JBoxContainer.INITIAL_DISK_USED_PCT is None: JBoxContainer.INITIAL_DISK_USED_PCT = disk_used_pct disk_used_pct = max( 0, (disk_used_pct - JBoxContainer.INITIAL_DISK_USED_PCT)) CloudHost.publish_stats("DiskUsed", "Percent", disk_used_pct) cont_load_pct = min( 100, max(0, nactive * 100 / JBoxContainer.MAX_CONTAINERS)) CloudHost.publish_stats("ContainersUsed", "Percent", cont_load_pct) CloudHost.publish_stats("DiskIdsUsed", "Percent", VolMgr.used_pct()) overall_load_pct = max(cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct()) CloudHost.publish_stats("Load", "Percent", overall_load_pct)
def start(self, email): self.refresh() JBoxContainer.log_info("Starting %s", self.debug_str()) if self.is_running() or self.is_restarting(): JBoxContainer.log_info("Already started %s", self.debug_str()) return disk = VolMgr.get_disk_for_user(email) vols = { disk.disk_path: { 'bind': JBoxContainer.VOLUMES[0], 'ro': False } } JBoxContainer.DCKR.start(self.dockid, port_bindings=JBoxContainer.CONTAINER_PORT_BINDINGS, binds=vols) self.refresh() JBoxContainer.log_info("Started %s", self.debug_str()) cname = self.get_name() if cname is not None: JBoxContainer.record_ping(cname)
def get_disk_allocated(self): disk = VolMgr.get_disk_from_container(self.dockid) if disk is not None: return disk.get_disk_allocated_size() return 0
def refresh_disks(): if JBoxd._is_scheduled(JBoxAsyncJob.CMD_UPDATE_USER_HOME_IMAGE, ()): return VolMgr.refresh_user_home_image()
def publish_container_stats(): VolMgr.publish_stats() db.publish_stats() JBoxDynConfig.set_stat_collected_date(Compute.get_install_id())
def launch_session(name, email, reuse=True): JBoxd._wait_for_session_backup(name) VolMgr.refresh_disk_use_status() SessContainer.launch_by_name(name, email, reuse=reuse) JBoxd.publish_perf_counters()
def update_user_home_image(): VolMgr.update_user_home_image(fetch=True) VolMgr.refresh_user_home_image()
def launch_session(name, email, reuse=True): JBoxd.publish_anticipated_load(name) JBoxd._wait_for_session_backup(name) VolMgr.refresh_disk_use_status() JBoxd._launch_session(name, email, reuse)
def update_user_home_image(): try: VolMgr.update_user_home_image(fetch=True) JBoxLoopbackVol.refresh_all_disks() finally: JBoxd.finish_thread()
def maintain(max_timeout=0, inactive_timeout=0, protected_names=()): JBoxContainer.log_info("Starting container maintenance...") tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if ( max_timeout > 0) else tmin stop_inacive_before = (tnow - datetime.timedelta( seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin all_containers = JBoxContainer.DCKR.containers(all=True) all_cnames = {} container_id_list = [] for cdesc in all_containers: cid = cdesc['Id'] cont = JBoxContainer(cid) container_id_list.append(cid) cname = cont.get_name() all_cnames[cname] = cid if (cname is None) or (cname in protected_names): JBoxContainer.log_debug("Ignoring %s", cont.debug_str()) continue c_is_active = cont.is_running() or cont.is_restarting() last_ping = JBoxContainer._get_last_ping(cname) # if we don't have a ping record, create one (we must have restarted) if (last_ping is None) and c_is_active: JBoxContainer.log_info("Discovered new container %s", cont.debug_str()) JBoxContainer.record_ping(cname) start_time = cont.time_started() # check that start time is not absurdly small (indicates a continer that's starting up) start_time_not_zero = (tnow - start_time).total_seconds() < ( 365 * 24 * 60 * 60) if (start_time < stop_before) and start_time_not_zero: # don't allow running beyond the limit for long running sessions # JBoxContainer.log_info("time_started " + str(cont.time_started()) + # " delete_before: " + str(delete_before) + # " cond: " + str(cont.time_started() < delete_before)) JBoxContainer.log_warn("Running beyond allowed time %s", cont.debug_str()) cont.async_backup_and_cleanup() elif (last_ping is not None) and c_is_active and ( last_ping < stop_inacive_before): # if inactive for too long, stop it # JBoxContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) + # " cond: " + str(last_ping < stop_before)) JBoxContainer.log_warn("Inactive beyond allowed time %s", cont.debug_str()) cont.async_backup_and_cleanup() # delete ping entries for non exixtent containers for cname in JBoxContainer.PINGS.keys(): if cname not in all_cnames: del JBoxContainer.PINGS[cname] JBoxContainer.VALID_CONTAINERS = all_cnames JBoxContainer.publish_container_stats() VolMgr.refresh_disk_use_status(container_id_list=container_id_list) JBoxContainer.log_info("Finished container maintenance.")
def collect_stats(): VolMgr.publish_stats() db.publish_stats() JBoxDynConfig.set_stat_collected_date(CloudHost.INSTALL_ID)
def do_update_user_home_image(): if VolMgr.has_update_for_user_home_image(): if not VolMgr.update_user_home_image(fetch=False): JBoxContainer.async_update_user_home_image()
def update_user_home_image(): VolMgr.update_user_home_image(fetch=True) JBoxLoopbackVol.refresh_all_disks()
CloudHost.configure(has_s3=cloud_cfg['s3'], has_dynamodb=cloud_cfg['dynamodb'], has_cloudwatch=cloud_cfg['cloudwatch'], has_autoscale=cloud_cfg['autoscale'], has_route53=cloud_cfg['route53'], has_ebs=cloud_cfg['ebs'], has_ses=cloud_cfg['ses'], scale_up_at_load=cloud_cfg['scale_up_at_load'], scale_up_policy=cloud_cfg['scale_up_policy'], autoscale_group=cloud_cfg['autoscale_group'], route53_domain=cloud_cfg['route53_domain'], region=cloud_cfg['region'], install_id=cloud_cfg['install_id']) VolMgr.configure(dckr, cfg) ts = JBoxVol._get_user_home_timestamp() VolMgr.log_debug("user_home_timestamp: %s", ts.strftime("%Y%m%d_%H%M")) img_dir, img_file = os.path.split(JBoxVol.USER_HOME_IMG) new_img_file_name = 'user_home_' + ts.strftime("%Y%m%d_%H%M") + '.tar.gz' new_img_file = os.path.join(img_dir, new_img_file_name) shutil.copyfile(JBoxVol.USER_HOME_IMG, new_img_file) VolMgr.log_debug("new image file is at : %s", new_img_file) bucket = 'juliabox-user-home-templates' VolMgr.log_debug("pushing new image file to s3 at: %s", bucket) CloudHost.push_file_to_s3(bucket, new_img_file)