Пример #1
0
    def publish_container_stats():
        """ Publish custom cloudwatch statistics. Used for status monitoring and auto scaling. """
        nactive = JBoxContainer.num_active()
        CloudHost.publish_stats("NumActiveContainers", "Count", nactive)

        curr_cpu_used_pct = psutil.cpu_percent()
        last_cpu_used_pct = curr_cpu_used_pct if JBoxContainer.LAST_CPU_PCT is None else JBoxContainer.LAST_CPU_PCT
        JBoxContainer.LAST_CPU_PCT = curr_cpu_used_pct
        cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct)/2)

        mem_used_pct = psutil.virtual_memory().percent
        CloudHost.publish_stats("MemUsed", "Percent", mem_used_pct)

        disk_used_pct = 0
        for x in psutil.disk_partitions():
            if not VolMgr.is_mount_path(x.mountpoint):
                try:
                    disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct)
                except:
                    pass
        if JBoxContainer.INITIAL_DISK_USED_PCT is None:
            JBoxContainer.INITIAL_DISK_USED_PCT = disk_used_pct
        disk_used_pct = max(0, (disk_used_pct - JBoxContainer.INITIAL_DISK_USED_PCT))
        CloudHost.publish_stats("DiskUsed", "Percent", disk_used_pct)

        cont_load_pct = min(100, max(0, nactive * 100 / JBoxContainer.MAX_CONTAINERS))
        CloudHost.publish_stats("ContainersUsed", "Percent", cont_load_pct)

        CloudHost.publish_stats("DiskIdsUsed", "Percent", VolMgr.used_pct())

        overall_load_pct = max(cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct())
        CloudHost.publish_stats("Load", "Percent", overall_load_pct)
Пример #2
0
 def collect_stats():
     try:
         VolMgr.publish_stats()
         db.publish_stats()
         JBoxDynConfig.set_stat_collected_date(CloudHost.INSTALL_ID)
     finally:
         JBoxd.finish_thread()
Пример #3
0
 def collect_stats():
     try:
         VolMgr.publish_stats()
         db.publish_stats()
         JBoxDynConfig.set_stat_collected_date(CloudHost.INSTALL_ID)
     finally:
         JBoxd.finish_thread()
Пример #4
0
    def init():
        dckr = docker.Client()
        cfg = read_config()
        cloud_cfg = cfg['cloud_host']
        cloud_cfg['backup_bucket'] = "juliabox_userbackup"

        LoggerMixin.setup_logger(level=cfg['root_log_level'])
        LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level']

        CloudHost.configure(
            has_s3=True,  #cloud_cfg['s3'],
            has_dynamodb=cloud_cfg['dynamodb'],
            has_cloudwatch=cloud_cfg['cloudwatch'],
            has_autoscale=cloud_cfg['autoscale'],
            has_route53=cloud_cfg['route53'],
            has_ebs=cloud_cfg['ebs'],
            has_ses=cloud_cfg['ses'],
            scale_up_at_load=cloud_cfg['scale_up_at_load'],
            scale_up_policy=cloud_cfg['scale_up_policy'],
            autoscale_group=cloud_cfg['autoscale_group'],
            route53_domain=cloud_cfg['route53_domain'],
            region=cloud_cfg['region'],
            install_id=cloud_cfg['install_id'])

        VolMgr.configure(dckr, cfg)
Пример #5
0
    def __init__(self):
        LoggerMixin.configure()
        db.configure()
        Compute.configure()
        SessContainer.configure()
        VolMgr.configure()

        JBoxAsyncJob.configure()
        JBoxAsyncJob.init(JBoxAsyncJob.MODE_PUB)

        self.application = tornado.web.Application(handlers=[
            (r"/", MainHandler),
            (r"/jboxadmin/", AdminHandler),
            (r"/jboxping/", PingHandler),
            (r"/jboxcors/", CorsHandler)
        ])
        JBPluginHandler.add_plugin_handlers(self.application)
        JBPluginUI.create_include_files()

        # cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32))
        # use sesskey as cookie secret to be able to span multiple tornado servers
        self.application.settings["cookie_secret"] = JBoxCfg.get('sesskey')
        self.application.settings["plugin_features"] = JBox.get_pluggedin_features()
        self.application.listen(JBoxCfg.get('interactive.manager_port'), address=socket.gethostname())
        self.application.listen(JBoxCfg.get('interactive.manager_port'), address='localhost')

        self.ioloop = tornado.ioloop.IOLoop.instance()

        # run container maintainence every 5 minutes
        run_interval = 5 * 60 * 1000
        self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes")
        self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop)
        self.sigct = tornado.ioloop.PeriodicCallback(JBox.do_signals, 1000, self.ioloop)
Пример #6
0
    def run(self):
        if VolMgr.has_update_for_user_home_image():
            VolMgr.update_user_home_image(fetch=False)

        while True:
            self.log_debug("JBox daemon waiting for commands...")
            cmd, data = self.queue.recv()

            if cmd == JBoxAsyncJob.CMD_BACKUP_CLEANUP:
                args = (data,)
                fn = JBoxd.backup_and_cleanup
            elif cmd == JBoxAsyncJob.CMD_LAUNCH_SESSION:
                args = (data[0], data[1], data[2])
                fn = JBoxd.launch_session
            elif cmd == JBoxAsyncJob.CMD_AUTO_ACTIVATE:
                args = ()
                fn = JBoxd.auto_activate
            elif cmd == JBoxAsyncJob.CMD_UPDATE_USER_HOME_IMAGE:
                args = ()
                fn = JBoxd.update_user_home_image
            elif cmd == JBoxAsyncJob.CMD_REFRESH_DISKS:
                args = ()
                fn = JBoxd.refresh_disks
            elif cmd == JBoxAsyncJob.CMD_COLLECT_STATS:
                args = ()
                fn = JBoxd.collect_stats
            else:
                self.log_error("Unknown command " + str(cmd))
                continue

            JBoxd.schedule_thread(cmd, fn, args)
Пример #7
0
    def run(self):
        if VolMgr.has_update_for_user_home_image():
            VolMgr.update_user_home_image(fetch=False)

        while True:
            self.log_debug("JBox daemon waiting for commands...")
            cmd, data = self.queue.recv()

            if cmd == JBoxAsyncJob.CMD_BACKUP_CLEANUP:
                args = (data, )
                fn = JBoxd.backup_and_cleanup
            elif cmd == JBoxAsyncJob.CMD_LAUNCH_SESSION:
                args = (data[0], data[1], data[2])
                fn = JBoxd.launch_session
            elif cmd == JBoxAsyncJob.CMD_AUTO_ACTIVATE:
                args = ()
                fn = JBoxd.auto_activate
            elif cmd == JBoxAsyncJob.CMD_UPDATE_USER_HOME_IMAGE:
                args = ()
                fn = JBoxd.update_user_home_image
            elif cmd == JBoxAsyncJob.CMD_REFRESH_DISKS:
                args = ()
                fn = JBoxd.refresh_disks
            elif cmd == JBoxAsyncJob.CMD_COLLECT_STATS:
                args = ()
                fn = JBoxd.collect_stats
            else:
                self.log_error("Unknown command " + str(cmd))
                continue

            JBoxd.schedule_thread(cmd, fn, args)
Пример #8
0
    def __init__(self):
        dckr = docker.Client()
        cfg = read_config()
        cloud_cfg = cfg['cloud_host']
        user_activation_cfg = cfg['user_activation']

        LoggerMixin.setup_logger(level=cfg['root_log_level'])
        LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level']

        db.configure_db(cfg)

        CloudHost.configure(has_s3=cloud_cfg['s3'],
                            has_dynamodb=cloud_cfg['dynamodb'],
                            has_cloudwatch=cloud_cfg['cloudwatch'],
                            has_autoscale=cloud_cfg['autoscale'],
                            has_route53=cloud_cfg['route53'],
                            has_ebs=cloud_cfg['ebs'],
                            has_ses=cloud_cfg['ses'],
                            scale_up_at_load=cloud_cfg['scale_up_at_load'],
                            scale_up_policy=cloud_cfg['scale_up_policy'],
                            autoscale_group=cloud_cfg['autoscale_group'],
                            route53_domain=cloud_cfg['route53_domain'],
                            region=cloud_cfg['region'],
                            install_id=cloud_cfg['install_id'])
        VolMgr.configure(dckr, cfg)
        JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'],
                                cfg['numlocalmax'], cfg['async_job_port'], async_mode=JBoxAsyncJob.MODE_SUB)
        self.log_debug("Backup daemon listening on port: " + str(cfg['async_job_port']))
        self.queue = JBoxContainer.ASYNC_JOB

        JBoxd.MAX_ACTIVATIONS_PER_SEC = user_activation_cfg['max_activations_per_sec']
        JBoxd.MAX_AUTO_ACTIVATIONS_PER_RUN = user_activation_cfg['max_activations_per_run']
        JBoxd.ACTIVATION_SUBJECT = user_activation_cfg['mail_subject']
        JBoxd.ACTIVATION_BODY = user_activation_cfg['mail_body']
        JBoxd.ACTIVATION_SENDER = user_activation_cfg['sender']
Пример #9
0
 def launch_session(name, email, reuse=True):
     try:
         JBoxd._wait_for_session_backup(name)
         VolMgr.refresh_disk_use_status()
         JBoxContainer.launch_by_name(name, email, reuse=reuse)
     finally:
         JBoxd.finish_thread()
Пример #10
0
 def launch_session(name, email, reuse=True):
     try:
         JBoxd._wait_for_session_backup(name)
         VolMgr.refresh_disk_use_status()
         JBoxContainer.launch_by_name(name, email, reuse=reuse)
     finally:
         JBoxd.finish_thread()
Пример #11
0
    def run(self):
        Compute.deregister_instance_dns()
        Compute.register_instance_dns()
        JBoxd.publish_perf_counters()

        JBoxd.log_debug("Setting up signal handlers")
        signal.signal(signal.SIGINT, JBoxd.signal_handler)
        signal.signal(signal.SIGTERM, JBoxd.signal_handler)

        if VolMgr.has_update_for_user_home_image():
            VolMgr.update_user_home_image(fetch=False)

        while True:
            self.log_debug("JBox daemon waiting for commands...")
            try:
                offline, reply_req = JBoxd.QUEUE.poll(self._is_scheduled(JBoxAsyncJob.CMD_REQ_RESP, ()))
            except ValueError:
                self.log_exception("Exception reading command. Will retry after 10 seconds")
                time.sleep(10)
                continue

            if offline:
                try:
                    self.process_offline()
                except:
                    self.log_exception("Exception scheduling request")

            if reply_req:
                JBoxd.schedule_thread(JBoxAsyncJob.CMD_REQ_RESP, JBoxd.process_and_respond, ())
Пример #12
0
    def maintain(max_timeout=0, inactive_timeout=0, protected_names=()):
        JBoxContainer.log_info("Starting container maintenance...")
        tnow = datetime.datetime.now(pytz.utc)
        tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc)

        stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (max_timeout > 0) else tmin
        stop_inacive_before = (tnow - datetime.timedelta(seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin

        all_containers = JBoxContainer.DCKR.containers(all=True)
        all_cnames = {}
        container_id_list = []
        for cdesc in all_containers:
            cid = cdesc['Id']
            cont = JBoxContainer(cid)
            container_id_list.append(cid)
            cname = cont.get_name()
            all_cnames[cname] = cid

            if (cname is None) or (cname in protected_names):
                JBoxContainer.log_debug("Ignoring %s", cont.debug_str())
                continue

            c_is_active = cont.is_running() or cont.is_restarting()
            last_ping = JBoxContainer._get_last_ping(cname)

            # if we don't have a ping record, create one (we must have restarted) 
            if (last_ping is None) and c_is_active:
                JBoxContainer.log_info("Discovered new container %s", cont.debug_str())
                JBoxContainer.record_ping(cname)

            start_time = cont.time_started()
            # check that start time is not absurdly small (indicates a continer that's starting up)
            start_time_not_zero = (tnow-start_time).total_seconds() < (365*24*60*60)
            if (start_time < stop_before) and start_time_not_zero:
                # don't allow running beyond the limit for long running sessions
                # JBoxContainer.log_info("time_started " + str(cont.time_started()) +
                #               " delete_before: " + str(delete_before) +
                #               " cond: " + str(cont.time_started() < delete_before))
                JBoxContainer.log_info("Running beyond allowed time %s", cont.debug_str())
                cont.async_backup_and_cleanup()
            elif (last_ping is not None) and c_is_active and (last_ping < stop_inacive_before):
                # if inactive for too long, stop it
                # JBoxContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) +
                #           " cond: " + str(last_ping < stop_before))
                JBoxContainer.log_info("Inactive beyond allowed time %s", cont.debug_str())
                cont.async_backup_and_cleanup()

        # delete ping entries for non exixtent containers
        for cname in JBoxContainer.PINGS.keys():
            if cname not in all_cnames:
                del JBoxContainer.PINGS[cname]

        JBoxContainer.VALID_CONTAINERS = all_cnames
        JBoxContainer.publish_container_stats()
        VolMgr.refresh_disk_use_status(container_id_list=container_id_list)
        JBoxContainer.log_info("Finished container maintenance.")
Пример #13
0
    def __init__(self):
        dckr = docker.Client()
        cfg = JBox.cfg = read_config()
        cloud_cfg = cfg['cloud_host']

        LoggerMixin.setup_logger(level=cfg['root_log_level'])
        LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level']

        JBoxHandler.configure(cfg)
        db.configure_db(cfg)

        CloudHost.configure(has_s3=cloud_cfg['s3'],
                            has_dynamodb=cloud_cfg['dynamodb'],
                            has_cloudwatch=cloud_cfg['cloudwatch'],
                            has_autoscale=cloud_cfg['autoscale'],
                            has_route53=cloud_cfg['route53'],
                            has_ebs=cloud_cfg['ebs'],
                            has_ses=cloud_cfg['ses'],
                            scale_up_at_load=cloud_cfg['scale_up_at_load'],
                            scale_up_policy=cloud_cfg['scale_up_policy'],
                            autoscale_group=cloud_cfg['autoscale_group'],
                            route53_domain=cloud_cfg['route53_domain'],
                            region=cloud_cfg['region'],
                            install_id=cloud_cfg['install_id'])

        VolMgr.configure(dckr, cfg)
        JBoxAsyncJob.configure(cfg)
        JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'],
                                cfg['cpu_limit'], cfg['numlocalmax'],
                                cfg['async_job_ports'])

        self.application = tornado.web.Application([
            (r"/", MainHandler), (r"/hostlaunchipnb/", AuthHandler),
            (r"/hostadmin/", AdminHandler), (r"/ping/", PingHandler),
            (r"/cors/", CorsHandler)
        ])
        cookie_secret = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for x in xrange(32))
        self.application.settings["cookie_secret"] = cookie_secret
        self.application.settings["google_oauth"] = cfg["google_oauth"]
        self.application.listen(cfg["port"])

        self.ioloop = tornado.ioloop.IOLoop.instance()

        # run container maintainence every 5 minutes
        run_interval = 5 * 60 * 1000
        self.log_info("Container maintenance every " +
                      str(run_interval / (60 * 1000)) + " minutes")
        self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping,
                                                  run_interval, self.ioloop)
Пример #14
0
    def __init__(self):
        dckr = docker.Client()
        cfg = JBox.cfg = read_config()
        cloud_cfg = cfg['cloud_host']

        LoggerMixin.setup_logger(level=cfg['root_log_level'])
        LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level']

        JBoxHandler.configure(cfg)
        db.configure_db(cfg)

        CloudHost.configure(has_s3=cloud_cfg['s3'],
                            has_dynamodb=cloud_cfg['dynamodb'],
                            has_cloudwatch=cloud_cfg['cloudwatch'],
                            has_autoscale=cloud_cfg['autoscale'],
                            has_route53=cloud_cfg['route53'],
                            has_ebs=cloud_cfg['ebs'],
                            has_ses=cloud_cfg['ses'],
                            scale_up_at_load=cloud_cfg['scale_up_at_load'],
                            scale_up_policy=cloud_cfg['scale_up_policy'],
                            autoscale_group=cloud_cfg['autoscale_group'],
                            route53_domain=cloud_cfg['route53_domain'],
                            region=cloud_cfg['region'],
                            install_id=cloud_cfg['install_id'])

        VolMgr.configure(dckr, cfg)
        JBoxAsyncJob.configure(cfg)
        JBoxContainer.configure(dckr, cfg['docker_image'], cfg['mem_limit'], cfg['cpu_limit'],
                                cfg['numlocalmax'], cfg['async_job_ports'])

        self.application = tornado.web.Application([
            (r"/", MainHandler),
            (r"/hostlaunchipnb/", AuthHandler),
            (r"/hostadmin/", AdminHandler),
            (r"/ping/", PingHandler),
            (r"/cors/", CorsHandler),
            (r"/hw/", HomeworkHandler)
        ])
        cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32))
        self.application.settings["cookie_secret"] = cookie_secret
        self.application.settings["google_oauth"] = cfg["google_oauth"]
        self.application.listen(cfg["port"])

        self.ioloop = tornado.ioloop.IOLoop.instance()

        # run container maintainence every 5 minutes
        run_interval = 5 * 60 * 1000
        self.log_info("Container maintenance every " + str(run_interval / (60 * 1000)) + " minutes")
        self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping, run_interval, self.ioloop)
Пример #15
0
    def start(self, email):
        self.refresh()
        JBoxContainer.log_info("Starting %s", self.debug_str())
        if self.is_running() or self.is_restarting():
            JBoxContainer.log_warn(
                "Already started %s. Browser connectivity issues?",
                self.debug_str())
            return

        disk = VolMgr.get_disk_for_user(email)
        vols = {
            disk.disk_path: {
                'bind': JBoxContainer.VOLUMES[0],
                'ro': False
            }
        }

        JBoxContainer.DCKR.start(
            self.dockid,
            port_bindings=JBoxContainer.CONTAINER_PORT_BINDINGS,
            binds=vols)
        self.refresh()
        JBoxContainer.log_info("Started %s", self.debug_str())
        cname = self.get_name()
        if cname is not None:
            JBoxContainer.record_ping(cname)
Пример #16
0
    def publish_perf_counters():
        """ Publish performance counters. Used for status monitoring and auto scaling. """
        VolMgr.refresh_disk_use_status()

        nactive = BaseContainer.num_active(BaseContainer.SFX_INT)
        stats = []
        stats.append(("NumActiveContainers", "Count", nactive))

        nactive_api = BaseContainer.num_active(BaseContainer.SFX_API)
        stats.append(("NumActiveAPIContainers", "Count", nactive_api))

        curr_cpu_used_pct = psutil.cpu_percent()
        last_cpu_used_pct = curr_cpu_used_pct if BaseContainer.LAST_CPU_PCT is None else BaseContainer.LAST_CPU_PCT
        BaseContainer.LAST_CPU_PCT = curr_cpu_used_pct
        cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct) / 2)
        stats.append(("CPUUsed", "Percent", cpu_used_pct))

        mem_used_pct = psutil.virtual_memory().percent
        stats.append(("MemUsed", "Percent", mem_used_pct))

        disk_used_pct = 0
        for x in psutil.disk_partitions():
            if not VolMgr.is_mount_path(x.mountpoint):
                try:
                    disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct)
                except:
                    pass
        if BaseContainer.INITIAL_DISK_USED_PCT is None:
            BaseContainer.INITIAL_DISK_USED_PCT = disk_used_pct
        disk_used_pct = max(0, (disk_used_pct - BaseContainer.INITIAL_DISK_USED_PCT))
        stats.append(("DiskUsed", "Percent", disk_used_pct))

        cont_load_pct = min(100, max(0, nactive * 100 / SessContainer.MAX_CONTAINERS))
        stats.append(("ContainersUsed", "Percent", cont_load_pct))

        api_cont_load_pct = min(100, max(0, nactive_api * 100 / APIContainer.MAX_CONTAINERS))
        stats.append(("APIContainersUsed", "Percent", api_cont_load_pct))

        stats.append(("DiskIdsUsed", "Percent", VolMgr.used_pct()))

        overall_load_pct = max(
            cont_load_pct, api_cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct()
        )
        stats.append(("Load", "Percent", overall_load_pct))
        Compute.publish_stats_multi(stats)
Пример #17
0
    def __init__(self):
        dckr = docker.Client()
        cfg = read_config()
        cloud_cfg = cfg['cloud_host']
        user_activation_cfg = cfg['user_activation']

        LoggerMixin.setup_logger(level=cfg['root_log_level'])
        LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level']

        db.configure_db(cfg)

        CloudHost.configure(has_s3=cloud_cfg['s3'],
                            has_dynamodb=cloud_cfg['dynamodb'],
                            has_cloudwatch=cloud_cfg['cloudwatch'],
                            has_autoscale=cloud_cfg['autoscale'],
                            has_route53=cloud_cfg['route53'],
                            has_ebs=cloud_cfg['ebs'],
                            has_ses=cloud_cfg['ses'],
                            scale_up_at_load=cloud_cfg['scale_up_at_load'],
                            scale_up_policy=cloud_cfg['scale_up_policy'],
                            autoscale_group=cloud_cfg['autoscale_group'],
                            route53_domain=cloud_cfg['route53_domain'],
                            region=cloud_cfg['region'],
                            install_id=cloud_cfg['install_id'])
        VolMgr.configure(dckr, cfg)
        JBoxAsyncJob.configure(cfg)
        JBoxContainer.configure(dckr,
                                cfg['docker_image'],
                                cfg['mem_limit'],
                                cfg['cpu_limit'],
                                cfg['numlocalmax'],
                                cfg['async_job_ports'],
                                async_mode=JBoxAsyncJob.MODE_SUB)
        self.log_debug("Backup daemon listening on ports: %s",
                       repr(cfg['async_job_ports']))
        JBoxd.QUEUE = JBoxContainer.ASYNC_JOB

        JBoxd.MAX_ACTIVATIONS_PER_SEC = user_activation_cfg[
            'max_activations_per_sec']
        JBoxd.MAX_AUTO_ACTIVATIONS_PER_RUN = user_activation_cfg[
            'max_activations_per_run']
        JBoxd.ACTIVATION_SUBJECT = user_activation_cfg['mail_subject']
        JBoxd.ACTIVATION_BODY = user_activation_cfg['mail_body']
        JBoxd.ACTIVATION_SENDER = user_activation_cfg['sender']
Пример #18
0
    def publish_perf_counters():
        """ Publish performance counters. Used for status monitoring and auto scaling. """
        VolMgr.refresh_disk_use_status()
        
        nactive = BaseContainer.num_active(BaseContainer.SFX_INT)
        stats = []
        stats.append(("NumActiveContainers", "Count", nactive))

        nactive_api = BaseContainer.num_active(BaseContainer.SFX_API)
        stats.append(("NumActiveAPIContainers", "Count", nactive_api))

        curr_cpu_used_pct = psutil.cpu_percent()
        last_cpu_used_pct = curr_cpu_used_pct if BaseContainer.LAST_CPU_PCT is None else BaseContainer.LAST_CPU_PCT
        BaseContainer.LAST_CPU_PCT = curr_cpu_used_pct
        cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct)/2)
        stats.append(("CPUUsed", "Percent", cpu_used_pct))

        mem_used_pct = psutil.virtual_memory().percent
        stats.append(("MemUsed", "Percent", mem_used_pct))

        disk_used_pct = 0
        for x in psutil.disk_partitions():
            if not VolMgr.is_mount_path(x.mountpoint):
                try:
                    disk_used_pct = max(psutil.disk_usage(x.mountpoint).percent, disk_used_pct)
                except:
                    pass
        if BaseContainer.INITIAL_DISK_USED_PCT is None:
            BaseContainer.INITIAL_DISK_USED_PCT = disk_used_pct
        disk_used_pct = max(0, (disk_used_pct - BaseContainer.INITIAL_DISK_USED_PCT))
        stats.append(("DiskUsed", "Percent", disk_used_pct))

        cont_load_pct = min(100, max(0, nactive * 100 / SessContainer.MAX_CONTAINERS))
        stats.append(("ContainersUsed", "Percent", cont_load_pct))

        api_cont_load_pct = min(100, max(0, nactive_api * 100 / APIContainer.MAX_CONTAINERS))
        stats.append(("APIContainersUsed", "Percent", api_cont_load_pct))

        stats.append(("DiskIdsUsed", "Percent", VolMgr.used_pct()))

        overall_load_pct = max(cont_load_pct, api_cont_load_pct, disk_used_pct, mem_used_pct, cpu_used_pct, VolMgr.used_pct())
        stats.append(("Load", "Percent", overall_load_pct))
        Compute.publish_stats_multi(stats)
Пример #19
0
    def __init__(self):
        LoggerMixin.configure()
        db.configure()
        Compute.configure()
        SessContainer.configure()
        APIContainer.configure()
        VolMgr.configure()

        JBoxAsyncJob.configure()
        JBoxAsyncJob.init(JBoxAsyncJob.MODE_SUB)

        self.log_debug("Container manager listening on ports: %s", repr(JBoxCfg.get('container_manager_ports')))
        JBoxd.QUEUE = JBoxAsyncJob.get()

        JBoxd.MAX_ACTIVATIONS_PER_SEC = JBoxCfg.get('user_activation.max_activations_per_sec')
        JBoxd.MAX_AUTO_ACTIVATIONS_PER_RUN = JBoxCfg.get('user_activation.max_activations_per_run')
        JBoxd.ACTIVATION_SUBJECT = JBoxCfg.get('user_activation.mail_subject')
        JBoxd.ACTIVATION_BODY = JBoxCfg.get('user_activation.mail_body')
        JBoxd.ACTIVATION_SENDER = JBoxCfg.get('user_activation.sender')
Пример #20
0
    def __init__(self):
        LoggerMixin.configure()
        db.configure()
        Compute.configure()
        SessContainer.configure()
        VolMgr.configure()

        JBoxAsyncJob.configure()
        JBoxAsyncJob.init(JBoxAsyncJob.MODE_PUB)

        self.application = tornado.web.Application(
            handlers=[(r"/", MainHandler), (
                r"/jboxadmin/",
                AdminHandler), (r"/jboxping/",
                                PingHandler), (r"/jboxcors/", CorsHandler)])
        JBPluginHandler.add_plugin_handlers(self.application)
        JBPluginUI.create_include_files()

        # cookie_secret = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in xrange(32))
        # use sesskey as cookie secret to be able to span multiple tornado servers
        self.application.settings["cookie_secret"] = JBoxCfg.get('sesskey')
        self.application.settings[
            "plugin_features"] = JBox.get_pluggedin_features()
        self.application.listen(JBoxCfg.get('interactive.manager_port'),
                                address=socket.gethostname())
        self.application.listen(JBoxCfg.get('interactive.manager_port'),
                                address='localhost')

        self.ioloop = tornado.ioloop.IOLoop.instance()

        # run container maintainence every 5 minutes
        run_interval = 5 * 60 * 1000
        self.log_info("Container maintenance every " +
                      str(run_interval / (60 * 1000)) + " minutes")
        self.ct = tornado.ioloop.PeriodicCallback(JBox.do_housekeeping,
                                                  run_interval, self.ioloop)
        self.sigct = tornado.ioloop.PeriodicCallback(JBox.do_signals, 1000,
                                                     self.ioloop)

        # or configure cacerts
        AsyncHTTPClient.configure(None, defaults=dict(validate_cert=None))
Пример #21
0
    def run(self):
        if VolMgr.has_update_for_user_home_image():
            VolMgr.update_user_home_image(fetch=False)

        while True:
            self.log_debug("JBox daemon waiting for commands...")
            try:
                offline, reply_req = JBoxd.QUEUE.poll(
                    self._is_scheduled(JBoxAsyncJob.CMD_REQ_RESP, ()))
            except ValueError:
                self.log_exception(
                    "Exception reading command. Will retry after 10 seconds")
                time.sleep(10)
                continue

            if offline:
                try:
                    self.process_offline()
                except:
                    self.log_exception("Exception scheduling request")

            if reply_req:
                JBoxd.schedule_thread(JBoxAsyncJob.CMD_REQ_RESP,
                                      JBoxd.process_and_respond, ())
Пример #22
0
    def delete(self, backup=False):
        JBoxContainer.log_info("Deleting %s", self.debug_str())
        self.refresh()
        cname = self.get_name()
        if self.is_running() or self.is_restarting():
            self.kill()

        disk = VolMgr.get_disk_from_container(self.dockid)
        if disk is not None:
            disk.release(backup=backup)

        if cname is not None:
            JBoxContainer.PINGS.pop(cname, None)
        JBoxContainer.DCKR.remove_container(self.dockid)
        JBoxContainer.log_info("Deleted %s", self.debug_str())
Пример #23
0
    def init():
        dckr = docker.Client()
        cfg = read_config()
        cloud_cfg = cfg['cloud_host']
        cloud_cfg['backup_bucket'] = "juliabox_userbackup"

        LoggerMixin.setup_logger(level=cfg['root_log_level'])
        LoggerMixin.DEFAULT_LEVEL = cfg['jbox_log_level']

        CloudHost.configure(has_s3=True, #cloud_cfg['s3'],
                        has_dynamodb=cloud_cfg['dynamodb'],
                        has_cloudwatch=cloud_cfg['cloudwatch'],
                        has_autoscale=cloud_cfg['autoscale'],
                        has_route53=cloud_cfg['route53'],
                        has_ebs=cloud_cfg['ebs'],
                        has_ses=cloud_cfg['ses'],
                        scale_up_at_load=cloud_cfg['scale_up_at_load'],
                        scale_up_policy=cloud_cfg['scale_up_policy'],
                        autoscale_group=cloud_cfg['autoscale_group'],
                        route53_domain=cloud_cfg['route53_domain'],
                        region=cloud_cfg['region'],
                        install_id=cloud_cfg['install_id'])

        VolMgr.configure(dckr, cfg)
Пример #24
0
    def publish_container_stats():
        """ Publish custom cloudwatch statistics. Used for status monitoring and auto scaling. """
        nactive = JBoxContainer.num_active()
        CloudHost.publish_stats("NumActiveContainers", "Count", nactive)

        curr_cpu_used_pct = psutil.cpu_percent()
        last_cpu_used_pct = curr_cpu_used_pct if JBoxContainer.LAST_CPU_PCT is None else JBoxContainer.LAST_CPU_PCT
        JBoxContainer.LAST_CPU_PCT = curr_cpu_used_pct
        cpu_used_pct = int((curr_cpu_used_pct + last_cpu_used_pct) / 2)

        mem_used_pct = psutil.virtual_memory().percent
        CloudHost.publish_stats("MemUsed", "Percent", mem_used_pct)

        disk_used_pct = 0
        for x in psutil.disk_partitions():
            if not VolMgr.is_mount_path(x.mountpoint):
                try:
                    disk_used_pct = max(
                        psutil.disk_usage(x.mountpoint).percent, disk_used_pct)
                except:
                    pass
        if JBoxContainer.INITIAL_DISK_USED_PCT is None:
            JBoxContainer.INITIAL_DISK_USED_PCT = disk_used_pct
        disk_used_pct = max(
            0, (disk_used_pct - JBoxContainer.INITIAL_DISK_USED_PCT))
        CloudHost.publish_stats("DiskUsed", "Percent", disk_used_pct)

        cont_load_pct = min(
            100, max(0, nactive * 100 / JBoxContainer.MAX_CONTAINERS))
        CloudHost.publish_stats("ContainersUsed", "Percent", cont_load_pct)

        CloudHost.publish_stats("DiskIdsUsed", "Percent", VolMgr.used_pct())

        overall_load_pct = max(cont_load_pct, disk_used_pct, mem_used_pct,
                               cpu_used_pct, VolMgr.used_pct())
        CloudHost.publish_stats("Load", "Percent", overall_load_pct)
Пример #25
0
    def delete(self, backup=False):
        JBoxContainer.log_info("Deleting %s", self.debug_str())
        self.refresh()
        cname = self.get_name()
        if self.is_running() or self.is_restarting():
            self.kill()

        disk = VolMgr.get_disk_from_container(self.dockid)
        if disk is not None:
            disk.release(backup=backup)

        if cname is not None:
            JBoxContainer.PINGS.pop(cname, None)
        JBoxContainer.DCKR.remove_container(self.dockid)
        JBoxContainer.log_info("Deleted %s", self.debug_str())
Пример #26
0
    def start(self, email):
        self.refresh()
        JBoxContainer.log_info("Starting %s", self.debug_str())
        if self.is_running() or self.is_restarting():
            JBoxContainer.log_info("Already started %s", self.debug_str())
            return

        disk = VolMgr.get_disk_for_user(email)
        vols = {
            disk.disk_path: {
                'bind': JBoxContainer.VOLUMES[0],
                'ro': False
            }
        }

        JBoxContainer.DCKR.start(self.dockid, port_bindings=JBoxContainer.CONTAINER_PORT_BINDINGS, binds=vols)
        self.refresh()
        JBoxContainer.log_info("Started %s", self.debug_str())
        cname = self.get_name()
        if cname is not None:
            JBoxContainer.record_ping(cname)
Пример #27
0
 def get_disk_allocated(self):
     disk = VolMgr.get_disk_from_container(self.dockid)
     if disk is not None:
         return disk.get_disk_allocated_size()
     return 0
Пример #28
0
 def refresh_disks():
     if JBoxd._is_scheduled(JBoxAsyncJob.CMD_UPDATE_USER_HOME_IMAGE, ()):
         return
     VolMgr.refresh_user_home_image()
Пример #29
0
 def publish_container_stats():
     VolMgr.publish_stats()
     db.publish_stats()
     JBoxDynConfig.set_stat_collected_date(Compute.get_install_id())
Пример #30
0
 def launch_session(name, email, reuse=True):
     JBoxd._wait_for_session_backup(name)
     VolMgr.refresh_disk_use_status()
     SessContainer.launch_by_name(name, email, reuse=reuse)
     JBoxd.publish_perf_counters()
Пример #31
0
 def update_user_home_image():
     VolMgr.update_user_home_image(fetch=True)
     VolMgr.refresh_user_home_image()
Пример #32
0
 def launch_session(name, email, reuse=True):
     JBoxd.publish_anticipated_load(name)
     JBoxd._wait_for_session_backup(name)
     VolMgr.refresh_disk_use_status()
     JBoxd._launch_session(name, email, reuse)
Пример #33
0
 def get_disk_allocated(self):
     disk = VolMgr.get_disk_from_container(self.dockid)
     if disk is not None:
         return disk.get_disk_allocated_size()
     return 0
Пример #34
0
 def update_user_home_image():
     try:
         VolMgr.update_user_home_image(fetch=True)
         JBoxLoopbackVol.refresh_all_disks()
     finally:
         JBoxd.finish_thread()
Пример #35
0
 def launch_session(name, email, reuse=True):
     JBoxd.publish_anticipated_load(name)
     JBoxd._wait_for_session_backup(name)
     VolMgr.refresh_disk_use_status()
     JBoxd._launch_session(name, email, reuse)
Пример #36
0
    def maintain(max_timeout=0, inactive_timeout=0, protected_names=()):
        JBoxContainer.log_info("Starting container maintenance...")
        tnow = datetime.datetime.now(pytz.utc)
        tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc)

        stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (
            max_timeout > 0) else tmin
        stop_inacive_before = (tnow - datetime.timedelta(
            seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin

        all_containers = JBoxContainer.DCKR.containers(all=True)
        all_cnames = {}
        container_id_list = []
        for cdesc in all_containers:
            cid = cdesc['Id']
            cont = JBoxContainer(cid)
            container_id_list.append(cid)
            cname = cont.get_name()
            all_cnames[cname] = cid

            if (cname is None) or (cname in protected_names):
                JBoxContainer.log_debug("Ignoring %s", cont.debug_str())
                continue

            c_is_active = cont.is_running() or cont.is_restarting()
            last_ping = JBoxContainer._get_last_ping(cname)

            # if we don't have a ping record, create one (we must have restarted)
            if (last_ping is None) and c_is_active:
                JBoxContainer.log_info("Discovered new container %s",
                                       cont.debug_str())
                JBoxContainer.record_ping(cname)

            start_time = cont.time_started()
            # check that start time is not absurdly small (indicates a continer that's starting up)
            start_time_not_zero = (tnow - start_time).total_seconds() < (
                365 * 24 * 60 * 60)
            if (start_time < stop_before) and start_time_not_zero:
                # don't allow running beyond the limit for long running sessions
                # JBoxContainer.log_info("time_started " + str(cont.time_started()) +
                #               " delete_before: " + str(delete_before) +
                #               " cond: " + str(cont.time_started() < delete_before))
                JBoxContainer.log_warn("Running beyond allowed time %s",
                                       cont.debug_str())
                cont.async_backup_and_cleanup()
            elif (last_ping is not None) and c_is_active and (
                    last_ping < stop_inacive_before):
                # if inactive for too long, stop it
                # JBoxContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) +
                #           " cond: " + str(last_ping < stop_before))
                JBoxContainer.log_warn("Inactive beyond allowed time %s",
                                       cont.debug_str())
                cont.async_backup_and_cleanup()

        # delete ping entries for non exixtent containers
        for cname in JBoxContainer.PINGS.keys():
            if cname not in all_cnames:
                del JBoxContainer.PINGS[cname]

        JBoxContainer.VALID_CONTAINERS = all_cnames
        JBoxContainer.publish_container_stats()
        VolMgr.refresh_disk_use_status(container_id_list=container_id_list)
        JBoxContainer.log_info("Finished container maintenance.")
Пример #37
0
 def update_user_home_image():
     try:
         VolMgr.update_user_home_image(fetch=True)
         JBoxLoopbackVol.refresh_all_disks()
     finally:
         JBoxd.finish_thread()
Пример #38
0
 def collect_stats():
     VolMgr.publish_stats()
     db.publish_stats()
     JBoxDynConfig.set_stat_collected_date(CloudHost.INSTALL_ID)
Пример #39
0
 def do_update_user_home_image():
     if VolMgr.has_update_for_user_home_image():
         if not VolMgr.update_user_home_image(fetch=False):
             JBoxContainer.async_update_user_home_image()
Пример #40
0
 def update_user_home_image():
     VolMgr.update_user_home_image(fetch=True)
     JBoxLoopbackVol.refresh_all_disks()
Пример #41
0
 def do_update_user_home_image():
     if VolMgr.has_update_for_user_home_image():
         if not VolMgr.update_user_home_image(fetch=False):
             JBoxContainer.async_update_user_home_image()
Пример #42
0
    CloudHost.configure(has_s3=cloud_cfg['s3'],
                        has_dynamodb=cloud_cfg['dynamodb'],
                        has_cloudwatch=cloud_cfg['cloudwatch'],
                        has_autoscale=cloud_cfg['autoscale'],
                        has_route53=cloud_cfg['route53'],
                        has_ebs=cloud_cfg['ebs'],
                        has_ses=cloud_cfg['ses'],
                        scale_up_at_load=cloud_cfg['scale_up_at_load'],
                        scale_up_policy=cloud_cfg['scale_up_policy'],
                        autoscale_group=cloud_cfg['autoscale_group'],
                        route53_domain=cloud_cfg['route53_domain'],
                        region=cloud_cfg['region'],
                        install_id=cloud_cfg['install_id'])

    VolMgr.configure(dckr, cfg)
    ts = JBoxVol._get_user_home_timestamp()
    VolMgr.log_debug("user_home_timestamp: %s", ts.strftime("%Y%m%d_%H%M"))

    img_dir, img_file = os.path.split(JBoxVol.USER_HOME_IMG)
    new_img_file_name = 'user_home_' + ts.strftime("%Y%m%d_%H%M") + '.tar.gz'
    new_img_file = os.path.join(img_dir, new_img_file_name)
    shutil.copyfile(JBoxVol.USER_HOME_IMG, new_img_file)

    VolMgr.log_debug("new image file is at : %s", new_img_file)

    bucket = 'juliabox-user-home-templates'

    VolMgr.log_debug("pushing new image file to s3 at: %s", bucket)
    CloudHost.push_file_to_s3(bucket, new_img_file)