def start(cif, scheduler): global _operations global _executor _executor = executor.Executor(name="periodic", workers_count=_WORKERS, max_tasks=_TASKS, scheduler=scheduler, max_workers=_MAX_WORKERS) _executor.start() def per_vm_operation(func, period): disp = VmDispatcher(cif.getVMs, _executor, func, _timeout_from(period)) return Operation(disp, period, scheduler) _operations = [ # Needs dispatching because updating the volume stats needs # access to the storage, thus can block. per_vm_operation(UpdateVolumes, config.getint('irs', 'vol_size_sample_interval')), # Needs dispatching because it accesses FS and libvirt data. # Ignored by new engine, has to be kept for BC sake. per_vm_operation(NumaInfoMonitor, config.getint('vars', 'vm_sample_numa_interval')), # Job monitoring need QEMU monitor access. per_vm_operation(BlockjobMonitor, config.getint('vars', 'vm_sample_jobs_interval')), # libvirt sampling using bulk stats can block, but unresponsive # domains are handled inside VMBulkSampler for performance reasons; # thus, does not need dispatching. Operation( sampling.VMBulkSampler(libvirtconnection.get(cif), cif.getVMs, sampling.stats_cache), config.getint('vars', 'vm_sample_interval'), scheduler), # We do this only until we get high water mark notifications # from QEMU. It accesses storage and/or QEMU monitor, so can block, # thus we need dispatching. per_vm_operation(DriveWatermarkMonitor, config.getint('vars', 'vm_watermark_interval')), Operation(sampling.HostMonitor(cif=cif), config.getint('vars', 'host_sample_stats_interval'), scheduler), Operation(containersconnection.monitor, config.getint('vars', 'vm_sample_interval'), scheduler), ] host.stats.start() for op in _operations: op.start()
def test_collect_fast_path_as_default(self): vms = make_vms(num=3) conn = FakeConnection(vms=vms) cache = FakeStatsCache() sampler = sampling.VMBulkSampler(conn, conn.getVMs, cache) with cache.await_completion(self.CALL_TIMEOUT): self.exc.dispatch(sampler, self.CALL_TIMEOUT) self.assertCallSequence(conn.__calls__, ['getAllDomainStats'])
def test_collect_slow_path_after_blocked(self): vms = make_vms(num=3) conn = FakeConnection(vms=vms) cache = FakeStatsCache() sampler = sampling.VMBulkSampler(conn, conn.getVMs, cache) with conn.stuck(self.TIMEOUT * 2): with cache.await_completion(self.TIMEOUT): # we expect only the call #2 (slow) to _complete_ # while te connection is stuck. Please note that # call will always be recorded, even if not completed self.exc.dispatch(sampler, self.CALL_TIMEOUT) self.exc.dispatch(sampler, self.CALL_TIMEOUT) self.assertCallSequence(conn.__calls__, ['getAllDomainStats', 'domainListGetStats'])
def test_collect_vm_unresponsive(self): vms = make_vms(num=3) conn = FakeConnection(vms=vms) cache = FakeStatsCache() sampler = sampling.VMBulkSampler(conn, conn.getVMs, cache) with conn.stuck(self.TIMEOUT * 2): with cache.await_completion(self.TIMEOUT, expected=2): # we only expect call #2 (slow) and call #3 (slow) # to _complete_, hence expected=2 self.exc.dispatch(sampler, self.CALL_TIMEOUT) vms['1'].ready = False self.exc.dispatch(sampler, self.CALL_TIMEOUT) self.exc.dispatch(sampler, self.CALL_TIMEOUT) self.assertCallSequence( conn.__calls__, ['getAllDomainStats', 'domainListGetStats', 'domainListGetStats'])
def test_slow_collect_while_vm_unresponsive(self): vms = make_vms(num=3) conn = FakeConnection(vms=vms) cache = FakeStatsCache() sampler = sampling.VMBulkSampler(conn, conn.getVMs, cache) with conn.stuck(self.TIMEOUT * 2): with cache.await_completion(self.TIMEOUT): self.exc.dispatch(sampler, self.CALL_TIMEOUT) vms['1'].ready = False self.exc.dispatch(sampler, self.CALL_TIMEOUT) # now we succesfully waited_for the second (slow) call: # call #1 (fast) recorded, not yet completed # call #2 (slow) recorded, completed, waited # now we need to be able to wait for the still pending call, # hence we re-prepare to wait cache.clear() # so we check indeed we recorded the right calls. # the call #1 (fast) may complete any moment, asynchronously expected = ['getAllDomainStats', 'domainListGetStats'] self.assertCallSequence(conn.__calls__, expected) # now we make sure the call #1 (fast) is completed. # we expect NOT to wait here, timeout added just in case assert (cache.sync.wait(self.TIMEOUT)) # reset fake environment to pristine state vms['1'].ready = True sampler._skip_doms.clear() expected.append('getAllDomainStats') with cache.await_completion(self.TIMEOUT): self.exc.dispatch(sampler, self.CALL_TIMEOUT) self.assertCallSequence(conn.__calls__, expected)