def maintain(max_timeout=0, inactive_timeout=0): SessContainer.log_info("Starting container maintenance...") tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (max_timeout > 0) else tmin stop_inacive_before = (tnow - datetime.timedelta(seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin all_containers = BaseContainer.session_containers(allcontainers=True) all_cnames = {} container_id_list = [] for cdesc in all_containers: cid = cdesc['Id'] cont = SessContainer(cid) container_id_list.append(cid) cname = cont.get_name() if cname is None: SessContainer.log_debug("Ignoring %s", cont.debug_str()) continue all_cnames[cname] = cid c_is_active = cont.is_running() or cont.is_restarting() last_ping = SessContainer._get_last_ping(cname) # if we don't have a ping record, create one (we must have restarted) if (last_ping is None) and c_is_active: SessContainer.log_info("Discovered new container %s", cont.debug_str()) SessContainer.record_ping(cname) start_time = cont.time_started() # check that start time is not absurdly small (indicates a continer that's starting up) start_time_not_zero = (tnow-start_time).total_seconds() < (365*24*60*60) if (start_time < stop_before) and start_time_not_zero: # don't allow running beyond the limit for long running sessions # SessContainer.log_info("time_started " + str(cont.time_started()) + # " delete_before: " + str(delete_before) + # " cond: " + str(cont.time_started() < delete_before)) SessContainer.log_warn("Running beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) elif (last_ping is not None) and c_is_active and (last_ping < stop_inacive_before): # if inactive for too long, stop it # SessContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) + # " cond: " + str(last_ping < stop_before)) SessContainer.log_warn("Inactive beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) # delete ping entries for non exixtent containers for cname in SessContainer.PINGS.keys(): if cname not in all_cnames: del SessContainer.PINGS[cname] SessContainer.VALID_CONTAINERS = all_cnames VolMgr.refresh_disk_use_status(container_id_list=container_id_list) SessContainer.log_info("Finished container maintenance.")
def refresh_container_list(): all_cnames = dict() tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) exp = APIContainer.EXPIRE_SECS stop_responded_before = (tnow - datetime.timedelta(seconds=exp)) if ( exp > 0) else tmin for c in BaseContainer.api_containers(allcontainers=True): cid = c['Id'] cont = APIContainer(cid) cname = cont.get_name() api_name = cont.get_api_name() APIContainer.log_debug("examining container %s (%s), api:%r", cid, cname, api_name) if api_name is None: continue c_is_active = cont.is_running() or cont.is_restarting() if not c_is_active: cont.delete() continue APIContainer.register_api_container(api_name, cname) last_ping = APIContainer._get_last_ping(cname) if (last_ping is not None) and c_is_active and ( last_ping < stop_responded_before): APIContainer.log_warn( "Terminating possibly unresponsive container %s.", cont.debug_str()) cont.kill() cont.delete() all_cnames[cname] = cid # delete ping entries for non existent containers for cname in APIContainer.PINGS.keys(): if cname not in all_cnames: del APIContainer.PINGS[cname] # delete non existent containers from container list dellist = [] for (api_name, clist) in APIContainer.API_CONTAINERS.iteritems(): clist[:] = [x for x in clist if x in all_cnames] if len(clist) == 0: dellist.append(api_name) for api_name in dellist: del APIContainer.API_CONTAINERS[api_name]
def refresh_container_list(): all_cnames = dict() tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) exp = APIContainer.EXPIRE_SECS stop_responded_before = (tnow - datetime.timedelta(seconds=exp)) if (exp > 0) else tmin for c in BaseContainer.api_containers(allcontainers=True): cid = c['Id'] cont = APIContainer(cid) cname = cont.get_name() api_name = cont.get_api_name() APIContainer.log_debug("examining container %s (%s), api:%r", cid, cname, api_name) if api_name is None: continue c_is_active = cont.is_running() or cont.is_restarting() if not c_is_active: cont.delete() continue APIContainer.register_api_container(api_name, cname) last_ping = APIContainer._get_last_ping(cname) if (last_ping is not None) and c_is_active and (last_ping < stop_responded_before): APIContainer.log_warn("Terminating possibly unresponsive container %s.", cont.debug_str()) cont.kill() cont.delete() all_cnames[cname] = cid # delete ping entries for non existent containers for cname in APIContainer.PINGS.keys(): if cname not in all_cnames: del APIContainer.PINGS[cname] # delete non existent containers from container list dellist = [] for (api_name, clist) in APIContainer.API_CONTAINERS.iteritems(): clist[:] = [x for x in clist if x in all_cnames] if len(clist) == 0: dellist.append(api_name) for api_name in dellist: del APIContainer.API_CONTAINERS[api_name]
def maintain(max_timeout=0, inactive_timeout=0): SessContainer.log_info("Starting container maintenance...") tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if ( max_timeout > 0) else tmin stop_inacive_before = (tnow - datetime.timedelta( seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin all_containers = BaseContainer.session_containers(allcontainers=True) all_cnames = {} container_id_list = [] for cdesc in all_containers: cid = cdesc['Id'] cont = SessContainer(cid) container_id_list.append(cid) cname = cont.get_name() if cname is None: SessContainer.log_debug("Ignoring %s", cont.debug_str()) continue all_cnames[cname] = cid c_is_active = cont.is_running() or cont.is_restarting() last_ping = SessContainer._get_last_ping(cname) # if we don't have a ping record, create one (we must have restarted) if (last_ping is None) and c_is_active: SessContainer.log_info("Discovered new container %s", cont.debug_str()) SessContainer.record_ping(cname) start_time = cont.time_started() # check that start time is not absurdly small (indicates a continer that's starting up) start_time_not_zero = (tnow - start_time).total_seconds() < ( 365 * 24 * 60 * 60) if (start_time < stop_before) and start_time_not_zero: # don't allow running beyond the limit for long running sessions # SessContainer.log_info("time_started " + str(cont.time_started()) + # " delete_before: " + str(delete_before) + # " cond: " + str(cont.time_started() < delete_before)) SessContainer.log_warn( "Running beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) elif (last_ping is not None) and c_is_active and ( last_ping < stop_inacive_before): # if inactive for too long, stop it # SessContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) + # " cond: " + str(last_ping < stop_before)) SessContainer.log_warn( "Inactive beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) elif not c_is_active and ( (tnow - cont.time_finished()).total_seconds() > (10 * 60)): SessContainer.log_warn("Dead container %s. Deleting.", cont.debug_str()) cont.delete(backup=False) del all_cnames[cname] container_id_list.remove(cid) # delete ping entries for non exixtent containers for cname in SessContainer.PINGS.keys(): if cname not in all_cnames: del SessContainer.PINGS[cname] SessContainer.VALID_CONTAINERS = all_cnames VolMgr.refresh_disk_use_status(container_id_list=container_id_list) SessContainer.log_info("Finished container maintenance.")