def __init__(self, vm_name, output_path, exp_config: DictConfig, host_config: DictConfig, application: application.Application, benchmark: cloudexp.guest.application.benchmark.Benchmark, guest_mom_config: Union[DictConfig, dict, None] = None, shared_terminable: Terminable = None, shared_deferred_start: DeferredStartThread = None, **props): libvirt_uri = host_config.get('main', 'libvirt-hypervisor-uri') username = exp_config.get('guest-credentials', 'username') password = exp_config.get('guest-credentials', 'password') GuestMachine.__init__(self, vm_name, libvirt_uri, username, password, application.get_image_name(), shared_terminable=shared_terminable, **props) self.output_path = output_path self.exp_config = exp_config self.host_conf = host_config self.guest_config = DictConfig(momguestd.DEFAULT_CONFIG, guest_mom_config) self.application = application self.benchmark = benchmark self.load_func = props.get('load_func', None) self.load_interval = props.get('load_interval', None) self.expected_load_rounds = props.get("expected_load_rounds", None) self.benchmark.set_guest_machine(self) self.application.set_guest_server_port( self.host_conf.get("guest-client", "port")) self.guest_client = load_guest_client(self.ip, self.vm_name, self.host_conf, base_name='exp-machine-client') self.benchmark_thread = BenchmarkExecutor( self.benchmark, self.load_func, self.load_interval, self.expected_load_rounds, name=self.vm_name, shared_terminable=shared_terminable, shared_deferred_start=shared_deferred_start) self.threads = {} self.ssh_connections = {} self._is_ready_for_experiment = Event()
class MomHostDaemon(Terminable): def __init__(self, config: Union[DictConfig, dict, None] = None, shared_terminable: Terminable = None): Terminable.__init__(self, shared_terminable=shared_terminable) self.logger = logging.getLogger(self.__class__.__name__) self.config = DictConfig(DEFAULT_CONFIG, config) # Set up a shared libvirt connection libvirt_uri = self.config.get('main', 'libvirt-hypervisor-uri') self.libvirt_iface = LibvirtInterface(libvirt_uri) self.guest_manager = GuestManager(self.config, libvirt_iface=self.libvirt_iface, shared_terminable=shared_terminable) self.host_monitor = HostMonitor(self.config, libvirt_iface=self.libvirt_iface, shared_terminable=shared_terminable) self.host_policy = HostPolicy(self.config, self.libvirt_iface, self.host_monitor, self.guest_manager, shared_terminable=shared_terminable) self.threads = { self.guest_manager, self.host_policy, self.host_monitor } def run_mom(self): # Start threads self.logger.info("Starting") for t in self.threads: if not t.is_alive() and self.should_run: t.start() interval = self.config.get('main', 'check-loop-interval') while self.should_run: self.terminable_sleep(interval) if not threads_ok(self.threads): if self.should_run: self.logger.warning( "One of the threads ended before it should. Terminating." ) break self.terminate() for t in self.threads: if t.is_alive(): t.join() self.logger.info("Daemon ending")
class MomGuestDaemon(LoggedObject): def __init__(self, config: Union[DictConfig, dict, None] = None, guest_name=None): LoggedObject.__init__(self, guest_name) self.config = DictConfig(DEFAULT_CONFIG, config) self.monitor = Monitor( self.config, monitor_name=guest_name ) # for collecting data, not running as a thread self.policy = ClassImporter(guest_policy).get_class( self.config.get('policy', 'policy'))(self.config) self.server = GuestServer(self.config, self.monitor, self.policy, guest_name=guest_name) def start(self): self.logger.info("Starting guest server, listening on %s:%s", self.server.host, self.server.port) self.server.serve_forever() self.server.shutdown() self.logger.info("Ended") def terminate(self): self.server.shutdown()
def __init__(self, config: DictConfig, libvirt_iface, host_monitor: Monitor, guest_manager: GuestManager, shared_terminable: Terminable = None): LoggedThread.__init__(self, daemon=True) Terminable.__init__(self, shared_terminable=shared_terminable) self.policy_lock = threading.RLock() self.host_monitor = host_monitor self.guest_manager = guest_manager self.config = config self.interval: float = max(self.config.get('policy', 'interval'), 1.) self.grace_period: float = min( self.config.get('policy', 'grace-period'), self.interval) self.inquiry_timeout: float = min( self.config.get('policy', 'inquiry-timeout'), self.interval) self.resources = [ r.strip() for r in self.config.get('policy', 'resources') ] self.policy = ClassImporter(allocators).get_class( config.get('policy', 'allocator'))(self.resources) self.controllers = [] self.properties = { 'libvirt_iface': libvirt_iface, 'host_monitor': host_monitor, 'guest_manager': guest_manager, 'config': config } self.policy_data_loggers = {} self.client_executor = None self.get_controllers()
class ExpMachine(GuestMachine): def __init__(self, vm_name, output_path, exp_config: DictConfig, host_config: DictConfig, application: application.Application, benchmark: cloudexp.guest.application.benchmark.Benchmark, guest_mom_config: Union[DictConfig, dict, None] = None, shared_terminable: Terminable = None, shared_deferred_start: DeferredStartThread = None, **props): libvirt_uri = host_config.get('main', 'libvirt-hypervisor-uri') username = exp_config.get('guest-credentials', 'username') password = exp_config.get('guest-credentials', 'password') GuestMachine.__init__(self, vm_name, libvirt_uri, username, password, application.get_image_name(), shared_terminable=shared_terminable, **props) self.output_path = output_path self.exp_config = exp_config self.host_conf = host_config self.guest_config = DictConfig(momguestd.DEFAULT_CONFIG, guest_mom_config) self.application = application self.benchmark = benchmark self.load_func = props.get('load_func', None) self.load_interval = props.get('load_interval', None) self.expected_load_rounds = props.get("expected_load_rounds", None) self.benchmark.set_guest_machine(self) self.application.set_guest_server_port( self.host_conf.get("guest-client", "port")) self.guest_client = load_guest_client(self.ip, self.vm_name, self.host_conf, base_name='exp-machine-client') self.benchmark_thread = BenchmarkExecutor( self.benchmark, self.load_func, self.load_interval, self.expected_load_rounds, name=self.vm_name, shared_terminable=shared_terminable, shared_deferred_start=shared_deferred_start) self.threads = {} self.ssh_connections = {} self._is_ready_for_experiment = Event() def as_dict(self): return dict( GuestMachine.as_dict(self), load_interval=self.load_interval, bm_cpus=self.get_benchmark_cpus(), guest_config=self.guest_config.get_dict(), ) def get_benchmark_cpu_count(self): return self.benchmark.get_required_cpus() def set_benchmark_cpus(self, bm_cpus): self.benchmark.set_benchmark_cpus(bm_cpus) def get_benchmark_cpus(self): return self.benchmark.get_benchmark_cpus() def init_experiment(self): self.logger.info("Initiating experiment on VM...") try: self.start_domain(restart_if_activated=False) self.wait_for_ssh_server() self.set_guest_host_name() self.set_vm_props() self.log_vm_sysctl_properties() self.install_framework() self.install_application() self.disable_cron() self.drop_caches() self.start_guest_server() self.guest_client.wait_for_server(shared_terminable=self) self.start_application() self.benchmark.wait_for_application() # Start benchmark thread self.benchmark_thread.start() except Exception as e: self.logger.exception("Failed to initiate experiment: %s", e) raise e else: self._is_ready_for_experiment.set() @property def is_ready_for_experiment(self): return self._is_ready_for_experiment.is_set() def end_experiment(self): self.terminate() self.benchmark_thread.terminate() self.close_client() self._is_ready_for_experiment.clear() self.logger.info("Waiting for remote guest server to end...") self.join_guest_server(20) self.logger.info("Waiting for remote program to end...") self.join_application(20) def close_client(self): if self.guest_client is not None: self.guest_client.close() def disable_cron(self): self.logger.info("Disabling Cron...") ssh_cron = self.ssh("service", "cron", "stop", name="disable-cron") ssh_cron.communicate() # short blocking if not ssh_cron.err: self.logger.info("Cron disabled: %s", ssh_cron.out.replace("\n", " ").strip()) else: self.logger.error("Fail to disable cron: out=%s, err=%s", ssh_cron.out, ssh_cron.err) def drop_caches(self): ssh_sync = self.ssh("sync", name="sync-cache") ssh_sync.communicate() # short blocking if ssh_sync.err: self.logger.error("Fail to sync cache to secondary memory: %s", ssh_sync.err) ssh_drop = self.ssh("echo 3 > /proc/sys/vm/drop_caches", name="drop-cache") if ssh_drop.err: self.logger.error("Fail to drop caches: %s", ssh_drop.err) self.logger.info( "Dropped caches.%s%s", (" [sync: %s]" % ssh_sync.out) if ssh_sync.out else "", (" [drop: %s]" % ssh_drop.out) if ssh_drop.out else "") def start_background_thread(self, target, name_prefix=None, args=(), kwargs=None): if not self.should_run: return name = f"{name_prefix}-{self.vm_name}" self.logger.info("Starting background thread: %s", name) try: t = LoggedThread(target=target, name=name, args=args, kwargs=kwargs, daemon=True) self.threads[name] = t t.start() except Exception as e: self.logger.exception("Failed to initiated thread '%s': %s", name, e) def _remote_function_call_thread(self, name, remote_function: Callable, *args, **kwargs): output_file = os.path.join(self.output_path, f'{name}-{self.vm_name}.log') conn = self.remote_function_call(remote_function, *args, output_file=output_file, **kwargs) self.ssh_connections[name] = conn out, err = conn.communicate() # blocking if self.should_run: self.logger.error( "%s crashed before its time - [out]: %s, [err]: %s", name, out, err) def start_remote_function_call_thread(self, name, remote_function: Callable, *args, **kwargs): self.start_background_thread(self._remote_function_call_thread, name, args=(name, remote_function, *args), kwargs=kwargs) def start_application(self): self.start_remote_function_call_thread( "application", remote_application, self.application, self.guest_config.get('logging', 'verbosity')) def start_guest_server(self): self.start_remote_function_call_thread("guest-server", remote_guest_server, self.guest_config.get( 'logging', 'verbosity'), self.guest_config, guest_name=self.vm_name) def terminate_ssh(self, ssh_name, timeout): ssh_thread = self.ssh_connections.get(ssh_name, None) if ssh_thread is None: return if ssh_thread.is_alive(): ssh_thread.terminate() ssh_thread.join(timeout) def join_application(self, timeout=None): self.terminate_ssh('application', timeout) def join_guest_server(self, timeout=None): self.terminate_ssh('guest-server', timeout) def install_framework(self): self.logger.info("Copy and install Python's code...") exclude = '*.pyc', '*.pyo', '__pycache__', '.*' self.rsync_retry(cloudexp.repository_path, '~', name='framework', exclude=exclude, delete=True) args = ['guest_setup.py', 'develop', '--user'] out, err = self.ssh(*args, cwd=cloudexp.repository_name, is_python=True, is_module=False, name="install-packages").communicate() if err: raise GuestMachineException( f"Could not install framework:\nOUT: {out}\nERR: {err}") def install_application(self): bin_path = self.application.get_bin_path() if bin_path is None: self.logger.info( "Application does not require to install binaries...") return self.logger.info("Copy and install application's binaries...") if type(bin_path) not in (list, tuple): assert isinstance(bin_path, str), "Binaries path must be a string." bin_path = (bin_path, ) for p in bin_path: self.rsync_retry(p, '~', name='app-binaries', delete=True)