def _start_daemon(d): pid = os.fork() if pid == 0: try: DAEMON_IMPLEMENTATIONS[d].Monitor(d).run() sys.exit(0) except Exception as e: util_general.ignore_exception('daemon creation', e) sys.exit(1) DAEMON_PIDS[pid] = d LOG.with_field('pid', pid).info('Started %s' % d)
def _compact_etcd(self): try: # We need to determine what revision to compact to, so we keep a # key which stores when we last compacted and we use it's latest # revision number as the revision to compact to. Note that we use # a different library for compaction as our primary library does # not support it. c = etcd3.client() c.put('/sf/compact', json.dumps({'compacted_at': time.time()})) _, kv = c.get('/sf/compact') c.compact(kv.mod_revision, physical=True) c.defragment() LOG.info('Compacted etcd') except Exception as e: util_general.ignore_exception('etcd compaction', e)
def run(self): LOG.info('Starting') gauges = { 'updated_at': Gauge('updated_at', 'The last time metrics were updated') } last_metrics = 0 def update_metrics(): global last_metrics stats = _get_stats() for metric in stats: if metric not in gauges: gauges[metric] = Gauge(metric, '') gauges[metric].set(stats[metric]) etcd.put('metrics', config.NODE_NAME, None, { 'fqdn': config.NODE_NAME, 'timestamp': time.time(), 'metrics': stats }, ttl=120) gauges['updated_at'].set_to_current_time() while not self.exit.is_set(): try: jobname, _ = etcd.dequeue('%s-metrics' % config.NODE_NAME) if jobname: if time.time() - last_metrics > 2: update_metrics() last_metrics = time.time() etcd.resolve('%s-metrics' % config.NODE_NAME, jobname) else: self.exit.wait(0.2) timer = time.time() - last_metrics if timer > config.SCHEDULER_CACHE_TIMEOUT: update_metrics() last_metrics = time.time() except Exception as e: util_general.ignore_exception('resource statistics', e)
def delete(self): # Mark files we used in the image cache as recently used so that they # linger a little for possible future users. for disk in self.block_devices.get('devices', []): if 'blob_uuid' in disk and disk['blob_uuid']: cached_image_path = util_general.file_permutation_exists( os.path.join(config.STORAGE_PATH, 'image_cache', disk['blob_uuid']), ['iso', 'qcow2']) if cached_image_path: pathlib.Path(cached_image_path).touch(exist_ok=True) with util_general.RecordedOperation('delete domain', self): try: self.power_off() nvram_path = os.path.join(self.instance_path, 'nvram') if os.path.exists(nvram_path): os.unlink(nvram_path) if self.nvram_template: b = blob.Blob.from_db(self.nvram_template) b.ref_count_dec() inst = self._get_domain() if inst: inst.undefine() except Exception as e: util_general.ignore_exception( 'instance delete domain %s' % self, e) with util_general.RecordedOperation('delete disks', self): try: if os.path.exists(self.instance_path): shutil.rmtree(self.instance_path) except Exception as e: util_general.ignore_exception( 'instance delete disks %s' % self, e) self.deallocate_instance_ports() if self.state.value.endswith('-%s' % self.STATE_ERROR): self.state = self.STATE_ERROR else: self.state = self.STATE_DELETED
def run(self): LOG.info('Starting') while True: try: self.reap_workers() if not self.exit.is_set(): if not self.dequeue_work_item(config.NODE_NAME, handle): self.exit.wait(0.2) elif len(self.workers) > 0: LOG.info('Waiting for %d workers to finish' % len(self.workers)) self.exit.wait(0.2) else: return except Exception as e: util_general.ignore_exception('queue worker', e)
def handle(jobname, workitem): libvirt = util_libvirt.get_libvirt() log = LOG.with_field('workitem', jobname) log.info('Processing workitem') setproctitle.setproctitle('%s-%s' % (daemon.process_name('queues'), jobname)) inst = None task = None try: for task in workitem.get('tasks', []): if not QueueTask.__subclasscheck__(type(task)): raise exceptions.UnknownTaskException( 'Task was not decoded: %s' % task) if InstanceTask.__subclasscheck__(type(task)): inst = instance.Instance.from_db(task.instance_uuid()) if not inst: raise exceptions.InstanceNotInDBException( task.instance_uuid()) if isinstance(task, FetchImageTask): inst = instance.Instance.from_db(task.instance_uuid()) if isinstance(task, SnapshotTask): inst = instance.Instance.from_db(task.instance_uuid()) if inst: log_i = log.with_instance(inst) else: log_i = log log_i.with_field('task_name', task.name()).info('Starting task') # TODO(andy) Should network events also come through here eventually? # Then this can be generalised to record events on networks/instances # TODO(andy) This event should be recorded when it is recorded as # dequeued in the DB. Currently it's reporting action on the item # and calling it 'dequeue'. if inst: # TODO(andy) move to QueueTask db.add_event('instance', inst.uuid, task.pretty_task_name(), 'dequeued', None, 'Work item %s' % jobname) if isinstance(task, FetchImageTask): image_fetch(task.url(), inst) elif isinstance(task, PreflightInstanceTask): if (inst.state.value == dbo.STATE_DELETED or inst.state.value.endswith('-error')): log_i.warning( 'You cannot preflight an instance in state %s, skipping task' % inst.state.value) continue redirect_to = instance_preflight(inst, task.network()) if redirect_to: log_i.info('Redirecting instance start to %s' % redirect_to) etcd.enqueue(redirect_to, workitem) return elif isinstance(task, StartInstanceTask): if (inst.state.value == dbo.STATE_DELETED or inst.state.value.endswith('-error')): log_i.warning( 'You cannot start an instance in state %s, skipping task' % inst.state.value) continue instance_start(inst, task.network()) etcd.enqueue('%s-metrics' % config.NODE_NAME, {}) elif isinstance(task, DeleteInstanceTask): try: instance_delete(inst) etcd.enqueue('%s-metrics' % config.NODE_NAME, {}) except Exception as e: util_general.ignore_exception( 'instance %s delete task' % inst, e) elif isinstance(task, FloatNetworkInterfaceTask): # Just punt it to the network node now that the interface is ready etcd.enqueue('networknode', task) elif isinstance(task, SnapshotTask): snapshot(inst, task.disk(), task.artifact_uuid(), task.blob_uuid()) elif isinstance(task, DeleteNetworkWhenClean): # Check if any interfaces remain on network task_network = net.Network.from_db(task.network_uuid()) ifaces = networkinterface.interfaces_for_network(task_network) cur_interfaces = {i.uuid: i for i in ifaces} if cur_interfaces: LOG.with_network(task_network).error( 'During DeleteNetworkWhenClean new interfaces have ' 'connected to network: %s', cur_interfaces) # Only check those present at delete task initiation time. remain_interfaces = list( set(task.wait_interfaces()) & set(cur_interfaces)) if remain_interfaces: # Queue task on a node with a remaining instance first_iface = cur_interfaces[remain_interfaces[0]] inst = instance.Instance.from_db(first_iface.instance_uuid) etcd.enqueue(inst.placement['node'], { 'tasks': [ DeleteNetworkWhenClean(task.network_uuid(), remain_interfaces) ] }, delay=60) else: # All original instances deleted, safe to delete network etcd.enqueue('networknode', DestroyNetworkTask(task.network_uuid())) elif isinstance(task, HypervisorDestroyNetworkTask): n = net.Network.from_db(task.network_uuid()) n.delete_on_hypervisor() elif isinstance(task, FetchBlobTask): metrics = etcd.get('metrics', config.NODE_NAME, None) if metrics: metrics = metrics.get('metrics', {}) else: metrics = {} b = blob.Blob.from_db(task.blob_uuid()) if not b: log.with_fields({ 'blob': task.blob_uuid() }).info('Cannot replicate blob, not found') elif (int(metrics.get('disk_free_blobs', 0)) - int(b.size) < config.MINIMUM_FREE_DISK): log.with_fields({ 'blob': task.blob_uuid() }).info('Cannot replicate blob, insufficient space') else: log.with_object(b).info('Replicating blob') size = b.ensure_local([]) log.with_object(b).with_fields({ 'transferred': size, 'expected': b.size }).info('Replicating blob complete') else: log_i.with_field('task', task).error('Unhandled task - dropped') log_i.info('Task complete') except exceptions.ImageFetchTaskFailedException as e: # Usually caused by external issue and not an application error log.info('Fetch Image Error: %s', e) if inst: inst.enqueue_delete_due_error('Image fetch failed: %s' % e) except exceptions.ImagesCannotShrinkException as e: log.info('Fetch Resize Error: %s', e) if inst: inst.enqueue_delete_due_error('Image resize failed: %s' % e) except libvirt.libvirtError as e: log.info('Libvirt Error: %s', e) if inst: inst.enqueue_delete_due_error('Instance task failed: %s' % e) except exceptions.InstanceException as e: log.info('Instance Error: %s', e) if inst: inst.enqueue_delete_due_error('Instance task failed: %s' % e) except Exception as e: # Logging ignored exception - this should be investigated util_general.ignore_exception('queue worker', e) if inst: inst.enqueue_delete_due_error('Failed queue task: %s' % e) finally: etcd.resolve(config.NODE_NAME, jobname) if inst: inst.add_event('tasks complete', 'dequeued', msg='Work item %s' % jobname) log.info('Completed workitem')
def _get_stats(): libvirt = util_libvirt.get_libvirt() conn = libvirt.open('qemu:///system') # What's special about this node? retval = { 'is_etcd_master': config.NODE_IS_ETCD_MASTER, 'is_hypervisor': config.NODE_IS_HYPERVISOR, 'is_network_node': config.NODE_IS_NETWORK_NODE, } # CPU info present_cpus, _, available_cpus = conn.getCPUMap() retval.update({ 'cpu_max': present_cpus, 'cpu_available': available_cpus, }) retval['cpu_max_per_instance'] = conn.getMaxVcpus(None) # This is disabled as data we don't currently use # for i in range(present_cpus): # per_cpu_stats = conn.getCPUStats(i) # for key in per_cpu_stats: # retval['cpu_core%d_%s' % (i, key)] = per_cpu_stats[key] try: load_1, load_5, load_15 = psutil.getloadavg() retval.update({ 'cpu_load_1': load_1, 'cpu_load_5': load_5, 'cpu_load_15': load_15, }) except Exception as e: util_general.ignore_exception('load average', e) # System memory info, converting bytes to mb stats = psutil.virtual_memory() retval.update({ 'memory_max': stats.total // 1024 // 1024, 'memory_available': stats.available // 1024 // 1024 }) # libvirt memory info, converting kb to mb memory_status = conn.getMemoryStats( libvirt.VIR_NODE_MEMORY_STATS_ALL_CELLS) retval.update({ 'memory_max_libvirt': memory_status['total'] // 1024, 'memory_available_libvirt': memory_status['free'] // 1024, }) # Kernel Shared Memory (KSM) information ksm_details = {} for ent in os.listdir('/sys/kernel/mm/ksm'): with open('/sys/kernel/mm/ksm/%s' % ent) as f: ksm_details['memory_ksm_%s' % ent] = int(f.read().rstrip()) retval.update(ksm_details) # Disk info. There could be more than one filesystem here, so we track # all of the paths we're fond of. fsids = [] minimum = -1 total = 0 used = 0 for path in ['', 'blobs', 'image_cache', 'instances', 'uploads']: # We need to make the paths we check if they don't exist, otherwise # they wont be included in the metrics and things get confused. fullpath = os.path.join(config.STORAGE_PATH, path) os.makedirs(fullpath, exist_ok=True) s = os.statvfs(fullpath) free = s.f_frsize * s.f_bavail if s.f_fsid not in fsids: total += s.f_frsize * s.f_blocks used += s.f_frsize * (s.f_blocks - s.f_bfree) if minimum == -1 or free < minimum: minimum = free if path == '': path = 'sfroot' retval['disk_free_%s' % path] = free retval.update({ 'disk_total': total, 'disk_free': minimum, 'disk_used': used }) disk_counters = psutil.disk_io_counters() retval.update({ 'disk_read_bytes': disk_counters.read_bytes, 'disk_write_bytes': disk_counters.write_bytes, }) # Network info net_counters = psutil.net_io_counters() retval.update({ 'network_read_bytes': net_counters.bytes_recv, 'network_write_bytes': net_counters.bytes_sent, }) # Virtual machine consumption info total_instances = 0 total_active_instances = 0 total_instance_max_memory = 0 total_instance_actual_memory = 0 total_instance_vcpus = 0 total_instance_cpu_time = 0 for guest in conn.listAllDomains(): try: active = guest.isActive() == 1 if active: _, maxmem, mem, cpus, cpu_time = guest.info() except libvirt.libvirtError as e: LOG.debug('During resource calc ignored libvirt error: %s' % e) active = False if active: total_instances += 1 total_active_instances += 1 total_instance_max_memory += maxmem total_instance_actual_memory += mem total_instance_vcpus += cpus total_instance_cpu_time += cpu_time # Queue health statistics node_queue_processing, node_queue_waiting = etcd.get_queue_length( config.NODE_NAME) retval.update({ 'cpu_total_instance_vcpus': total_instance_vcpus, 'cpu_total_instance_cpu_time': total_instance_cpu_time, 'memory_total_instance_max': total_instance_max_memory // 1024, 'memory_total_instance_actual': total_instance_actual_memory // 1024, 'instances_total': total_instances, 'instances_active': total_active_instances, 'node_queue_processing': node_queue_processing, 'node_queue_waiting': node_queue_waiting, }) if config.NODE_IS_NETWORK_NODE: network_queue_processing, network_queue_waiting = etcd.get_queue_length( 'networknode') retval.update({ 'network_queue_processing': network_queue_processing, 'network_queue_waiting': network_queue_waiting, }) return retval
def run(self): LOG.info('Starting') last_management = 0 last_shutdown_notification = 0 network_worker = None stray_interface_worker = None maintain_networks_worker = None floating_ip_reap_worker = None mtu_validation_worker = None while True: try: self.reap_workers() if not self.exit.is_set(): worker_pids = [] for w in self.workers: worker_pids.append(w.pid) if config.NODE_IS_NETWORK_NODE and network_worker not in worker_pids: network_worker = self.start_workitem( self._process_network_node_workitems, [], 'net-worker') if time.time() - last_management > 30: # Management tasks are treated as extra workers, and run in # parallel with other network work items. if stray_interface_worker not in worker_pids: LOG.info('Scanning for stray network interfaces') stray_interface_worker = self.start_workitem( self._remove_stray_interfaces, [], 'stray-nics') if maintain_networks_worker not in worker_pids: LOG.info('Maintaining existing networks') maintain_networks_worker = self.start_workitem( self._maintain_networks, [], 'maintain') if mtu_validation_worker not in worker_pids: LOG.info('Validating network interface MTUs') mtu_validation_worker = self.start_workitem( self._validate_mtus, [], 'mtus') if config.NODE_IS_NETWORK_NODE: LOG.info('Reaping stray floating IPs') if floating_ip_reap_worker not in worker_pids: floating_ip_reap_worker = self.start_workitem( self._reap_leaked_floating_ips, [], 'fip-reaper') last_management = time.time() elif len(self.workers) > 0: if time.time() - last_shutdown_notification > 5: LOG.info('Waiting for %d workers to finish' % len(self.workers)) last_shutdown_notification = time.time() else: return self.exit.wait(0.2) except Exception as e: util_general.ignore_exception('network worker', e)
def restore_instances(): # Ensure all instances for this node are defined and have up to date data. networks = [] instances = [] for inst in instance.Instances([instance.this_node_filter, instance.healthy_states_filter]): instance_problems = [] inst_interfaces = inst.interfaces if not inst_interfaces: inst_interfaces = [] updated_interfaces = False for ni in interfaces_for_instance(inst): if ni.network_uuid not in networks: networks.append(ni.network_uuid) if ni.uuid not in inst_interfaces: inst_interfaces.append(ni.uuid) updated_interfaces = True # We do not need a lock here because this loop only runs on the node # with the instance, and interfaces don't change post instance # creation. if updated_interfaces: inst.interfaces = inst_interfaces # TODO(mikal): do better here. # for disk in inst.disk_spec: # if disk.get('base'): # img = images.Image.new(disk['base']) # # NOTE(mikal): this check isn't great -- it checks for the original # # downloaded image, not the post transcode version # if (img.state in [dbo.STATE_DELETED, dbo.STATE_ERROR] or # not os.path.exists(img.version_image_path())): # instance_problems.append( # '%s missing from image cache' % disk['base']) # img.delete() if instance_problems: inst.enqueue_delete_due_error( 'instance bad on startup: %s' % '; '.join(instance_problems)) else: instances.append(inst) with util_general.RecordedOperation('restore networks', None): for network in networks: try: n = net.Network.from_db(network) if not n.is_dead(): LOG.with_object(n).info('Restoring network') n.create_on_hypervisor() n.ensure_mesh() except Exception as e: util_general.ignore_exception( 'restore network %s' % network, e) with util_general.RecordedOperation('restore instances', None): for inst in instances: try: with inst.get_lock(ttl=120, timeout=120, op='Instance restore'): started = ['on', 'transition-to-on', instance.Instance.STATE_INITIAL, 'unknown'] if inst.power_state not in started: continue LOG.with_object(inst).info('Restoring instance') inst.create_on_hypervisor() except Exception as e: util_general.ignore_exception( 'restore instance %s' % inst, e) inst.etcd.enqueue_delete_due_error( 'exception while restoring instance on daemon restart')