def copy_ovf_env(self): """ Copy ovf env file from dvd to hard disk. Remove password before save it to the disk """ dvd_mount_point = conf.get_dvd_mount_point() ovf_file_path_on_dvd = os.path.join(dvd_mount_point, OVF_FILE_NAME) tag_file_path_on_dvd = os.path.join(dvd_mount_point, TAG_FILE_NAME) try: self.osutil.mount_dvd() ovfxml = fileutil.read_file(ovf_file_path_on_dvd, remove_bom=True) ovfenv = OvfEnv(ovfxml) ovfxml = re.sub("<UserPassword>.*?<", "<UserPassword>*<", ovfxml) ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME) fileutil.write_file(ovf_file_path, ovfxml) if os.path.isfile(tag_file_path_on_dvd): logger.info("Found {0} in provisioning ISO", TAG_FILE_NAME) tag_file_path = os.path.join(conf.get_lib_dir(), TAG_FILE_NAME) shutil.copyfile(tag_file_path_on_dvd, tag_file_path) except (OSUtilError, IOError) as e: raise ProtocolError(ustr(e)) try: self.osutil.umount_dvd() self.osutil.eject_dvd() except OSUtilError as e: logger.warn(ustr(e)) return ovfenv
def _write_pid_file(self): pid_files = self._get_pid_files() pid_dir, pid_name, pid_re = self._get_pid_parts() previous_pid_file = None \ if len(pid_files) <= 0 \ else pid_files[-1] pid_index = -1 \ if previous_pid_file is None \ else int(pid_re.match(os.path.basename(previous_pid_file)).group(1)) pid_file = os.path.join(pid_dir, "{0}_{1}".format(pid_index+1, pid_name)) try: fileutil.write_file(pid_file, ustr(os.getpid())) logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid())) except Exception as e: pid_file = None logger.warn( u"Expection writing goal state agent {0} pid to {1}: {2}", CURRENT_AGENT, pid_file, ustr(e)) return pid_files, pid_file
def run(self): # If provision is enabled, run default provision handler if conf.get_provision_enabled(): logger.warn("Provisioning flag is enabled, this is not typical" "in Ubuntu, please ensure your config is correct.") super(UbuntuProvisionHandler, self).run() return provisioned = os.path.join(conf.get_lib_dir(), "provisioned") if os.path.isfile(provisioned): logger.info("Provisioning already completed, skipping.") return logger.info("Running Ubuntu provisioning handler") self.wait_for_ovfenv() self.protocol_util.get_protocol() self.report_not_ready("Provisioning", "Starting") try: thumbprint = self.wait_for_ssh_host_key() fileutil.write_file(provisioned, "") logger.info("Finished provisioning") except ProvisionError as e: logger.error("Provisioning failed: {0}", ustr(e)) self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) return self.report_ready(thumbprint) self.report_event("Provision succeed", is_success=True)
def run(self, child_args=None): logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) self.check_pid() self.initialize_environment() CGroups.setup() # If FIPS is enabled, set the OpenSSL environment variable # Note: # -- Subprocesses inherit the current environment if conf.get_fips_enabled(): os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1' while self.running: try: self.daemon(child_args) except Exception as e: err_msg = traceback.format_exc() add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg), op=WALAEventOperation.UnhandledError) logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon") time.sleep(15)
def remove_firewall(self, dst_ip=None, uid=None): # If a previous attempt failed, do not retry global _enable_firewall if not _enable_firewall: return False try: if dst_ip is None or uid is None: msg = "Missing arguments to enable_firewall" logger.warn(msg) raise Exception(msg) wait = self.get_firewall_will_wait() # This rule was <= 2.2.25 only, and may still exist on some VMs. Until 2.2.25 # has aged out, keep this cleanup in place. self._delete_rule(FIREWALL_DELETE_CONNTRACK_ACCEPT.format(wait, dst_ip)) self._delete_rule(FIREWALL_DELETE_OWNER_ACCEPT.format(wait, dst_ip, uid)) self._delete_rule(FIREWALL_DELETE_CONNTRACK_DROP.format(wait, dst_ip)) return True except Exception as e: _enable_firewall = False logger.info("Unable to remove firewall -- " "no further attempts will be made: " "{0}".format(ustr(e))) return False
def _get_all_interfaces(self): """ Return a dictionary mapping from interface name to IPv4 address. Interfaces without a name are ignored. """ expected=16 # how many devices should I expect... struct_size = DefaultOSUtil._get_struct_ifconf_size() array_size = expected * struct_size buff = array.array('B', b'\0' * array_size) param = struct.pack('iL', array_size, buff.buffer_info()[0]) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) ret = fcntl.ioctl(sock.fileno(), IOCTL_SIOCGIFCONF, param) retsize = (struct.unpack('iL', ret)[0]) sock.close() if retsize == array_size: logger.warn(('SIOCGIFCONF returned more than {0} up ' 'network interfaces.'), expected) ifconf_buff = buff.tostring() ifaces = {} for i in range(0, array_size, struct_size): iface = ifconf_buff[i:i+IFNAMSIZ].split(b'\0', 1)[0] if len(iface) > 0: iface_name = iface.decode('latin-1') if iface_name not in ifaces: ifaces[iface_name] = socket.inet_ntoa(ifconf_buff[i+20:i+24]) return ifaces
def get_firewall_dropped_packets(self, dst_ip=None): # If a previous attempt failed, do not retry global _enable_firewall if not _enable_firewall: return 0 try: wait = self.get_firewall_will_wait() rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait), log_cmd=False) if rc == 3: # Transient error that we ignore. This code fires every loop # of the daemon (60m), so we will get the value eventually. return 0 if rc != 0: return -1 pattern = re.compile(PACKET_PATTERN.format(dst_ip)) for line in output.split('\n'): m = pattern.match(line) if m is not None: return int(m.group(1)) return 0 except Exception as e: _enable_firewall = False logger.warn("Unable to retrieve firewall packets dropped" "{0}".format(ustr(e))) return -1
def device_for_ide_port(self, port_id): """ Return device name attached to ide port 'n'. """ if port_id > 3: return None g0 = "00000000" if port_id > 1: g0 = "00000001" port_id = port_id - 2 device = None path = "/sys/bus/vmbus/devices/" if os.path.exists(path): try: for vmbus in os.listdir(path): deviceid = fileutil.read_file(os.path.join(path, vmbus, "device_id")) guid = deviceid.lstrip('{').split('-') if guid[0] == g0 and guid[1] == "000" + ustr(port_id): for root, dirs, files in os.walk(path + vmbus): if root.endswith("/block"): device = dirs[0] break else: # older distros for d in dirs: if ':' in d and "block" == d.split(':')[0]: device = d.split(':')[1] break break except OSError as oe: logger.warn('Could not obtain device for IDE port {0}: {1}', port_id, ustr(oe)) return device
def get_installed_version(self): lastest_version = None for path in glob.iglob(os.path.join(conf.get_lib_dir(), self.ext_handler.name + "-*")): if not os.path.isdir(path): continue separator = path.rfind('-') version = FlexibleVersion(path[separator+1:]) existing_state = os.path.join(path, 'config', 'HandlerState') should_remove = False if not os.path.exists(existing_state): should_remove = True else: with open(existing_state) as fh: existing_state_text = fh.read() if existing_state_text is None or \ existing_state == ExtHandlerState.NotInstalled: should_remove = True if should_remove: logger.warn("Extension directory does not contain a valid " "status, removing [{0}]".format(path)) shutil.rmtree(path, ignore_errors=True) continue else: logger.verbose("Extension directory contains valid status " "[{0}]".format(path)) if lastest_version is None or lastest_version < version: lastest_version = version return str(lastest_version) if lastest_version is not None else None
def run(self): # If provision is enabled, run default provision handler if conf.get_provision_enabled(): logger.warn("Provisioning flag is enabled, which overrides using " "cloud-init; running the default provisioning code") super(CloudInitProvisionHandler, self).run() return try: if super(CloudInitProvisionHandler, self).is_provisioned(): logger.info("Provisioning already completed, skipping.") return utc_start = datetime.utcnow() logger.info("Running CloudInit provisioning handler") self.wait_for_ovfenv() self.protocol_util.get_protocol() self.report_not_ready("Provisioning", "Starting") thumbprint = self.wait_for_ssh_host_key() self.write_provisioned() logger.info("Finished provisioning") self.report_ready(thumbprint) self.report_event("Provisioning with cloud-init succeeded ({0}s)".format(self._get_uptime_seconds()), is_success=True, duration=elapsed_milliseconds(utc_start)) except ProvisionError as e: msg = "Provisioning with cloud-init failed: {0} ({1}s)".format(ustr(e), self._get_uptime_seconds()) logger.error(msg) self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(msg) return
def wait_for_ovfenv(self, max_retry=1800, sleep_time=1): """ Wait for cloud-init to copy ovf-env.xml file from provision ISO """ ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME) for retry in range(0, max_retry): if os.path.isfile(ovf_file_path): try: ovf_env = OvfEnv(fileutil.read_file(ovf_file_path)) self.handle_provision_guest_agent(ovf_env.provision_guest_agent) return except ProtocolError as pe: raise ProvisionError("OVF xml could not be parsed " "[{0}]: {1}".format(ovf_file_path, ustr(pe))) else: if retry < max_retry - 1: logger.info( "Waiting for cloud-init to copy ovf-env.xml to {0} " "[{1} retries remaining, " "sleeping {2}s]".format(ovf_file_path, max_retry - retry, sleep_time)) if not self.validate_cloud_init(): logger.warn("cloud-init does not appear to be running") time.sleep(sleep_time) raise ProvisionError("Giving up, ovf-env.xml was not copied to {0} " "after {1}s".format(ovf_file_path, max_retry * sleep_time))
def wait_for_ssh_host_key(self, max_retry=1800, sleep_time=1): """ Wait for cloud-init to generate ssh host key """ keypair_type = conf.get_ssh_host_keypair_type() path = conf.get_ssh_key_public_path() for retry in range(0, max_retry): if os.path.isfile(path): logger.info("ssh host key found at: {0}".format(path)) try: thumbprint = self.get_ssh_host_key_thumbprint(chk_err=False) logger.info("Thumbprint obtained from : {0}".format(path)) return thumbprint except ProvisionError: logger.warn("Could not get thumbprint from {0}".format(path)) if retry < max_retry - 1: logger.info("Waiting for ssh host key be generated at {0} " "[{1} attempts remaining, " "sleeping {2}s]".format(path, max_retry - retry, sleep_time)) if not self.validate_cloud_init(): logger.warn("cloud-init does not appear to be running") time.sleep(sleep_time) raise ProvisionError("Giving up, ssh host key was not found at {0} " "after {1}s".format(path, max_retry * sleep_time))
def download_ext_handler_pkg(self, uri, headers=None): try: resp = restutil.http_get(uri, chk_proxy=True, headers=headers) if resp.status == restutil.httpclient.OK: return resp.read() except Exception as e: logger.warn("Failed to download from: {0}".format(uri), e)
def migrate_handler_state(): handler_state_path = os.path.join(conf.get_lib_dir(), "handler_state") if not os.path.isdir(handler_state_path): return for handler_path in glob.iglob(os.path.join(handler_state_path, "*")): handler = os.path.basename(handler_path) handler_config_path = os.path.join(conf.get_lib_dir(), handler, "config") if os.path.isdir(handler_config_path): for file in ("State", "Status"): from_path = os.path.join(handler_state_path, handler, file.lower()) to_path = os.path.join(handler_config_path, "Handler" + file) if os.path.isfile(from_path) and not os.path.isfile(to_path): try: shutil.move(from_path, to_path) except Exception as e: logger.warn( "Exception occurred migrating {0} {1} file: {2}", handler, file, str(e)) try: shutil.rmtree(handler_state_path) except Exception as e: logger.warn("Exception occurred removing {0}: {1}", handler_state_path, str(e)) return
def mount_dvd(self, max_retry=6, chk_err=True, dvd_device=None, mount_point=None, sleep_time=5): if dvd_device is None: dvd_device = self.get_dvd_device() if mount_point is None: mount_point = conf.get_dvd_mount_point() if not os.path.isdir(mount_point): os.makedirs(mount_point) for retry in range(0, max_retry): retcode = self.mount(dvd_device, mount_point, option="-o ro -t udf", chk_err=False) if retcode == 0: logger.info("Successfully mounted DVD") return if retry < max_retry - 1: mountlist = shellutil.run_get_output("/sbin/mount")[1] existing = self.get_mount_point(mountlist, dvd_device) if existing is not None: logger.info("{0} is mounted at {1}", dvd_device, existing) return logger.warn("Mount DVD failed: retry={0}, ret={1}", retry, retcode) time.sleep(sleep_time) if chk_err: raise OSUtilError("Failed to mount DVD.")
def mount_resource_disk(self, mount_point, fs): device = self.osutil.device_for_ide_port(1) if device is None: raise ResourceDiskError("unable to detect disk topology") device = "/dev/" + device mountlist = shellutil.run_get_output("mount")[1] existing = self.osutil.get_mount_point(mountlist, device) if(existing): logger.info("Resource disk {0}1 is already mounted", device) return existing fileutil.mkdir(mount_point, mode=0o755) logger.info("Detect GPT...") partition = device + "1" ret = shellutil.run_get_output("parted {0} print".format(device)) if ret[0]: raise ResourceDiskError("({0}) {1}".format(device, ret[1])) if "gpt" in ret[1]: logger.info("GPT detected") logger.info("Get GPT partitions") parts = [x for x in ret[1].split("\n") if re.match("^\s*[0-9]+", x)] logger.info("Found more than {0} GPT partitions.", len(parts)) if len(parts) > 1: logger.info("Remove old GPT partitions") for i in range(1, len(parts) + 1): logger.info("Remove partition: {0}", i) shellutil.run("parted {0} rm {1}".format(device, i)) logger.info("Create a new GPT partition using entire disk space") shellutil.run("parted {0} mkpart primary 0% 100%".format(device)) logger.info("Format partition: {0} with fstype {1}",partition,fs) shellutil.run("mkfs." + fs + " " + partition + " -F") else: logger.info("GPT not detected") logger.info("Check fstype") ret = shellutil.run_get_output("sfdisk -q -c {0} 1".format(device)) if ret[1].rstrip() == "7" and fs != "ntfs": logger.info("The partition is formatted with ntfs") logger.info("Format partition: {0} with fstype {1}",partition,fs) shellutil.run("sfdisk -c {0} 1 83".format(device)) shellutil.run("mkfs." + fs + " " + partition + " -F") logger.info("Mount resource disk") ret = shellutil.run("mount {0} {1}".format(partition, mount_point), chk_err=False) if ret: logger.warn("Failed to mount resource disk. Retry mounting") shellutil.run("mkfs." + fs + " " + partition + " -F") ret = shellutil.run("mount {0} {1}".format(partition, mount_point)) if ret: raise ResourceDiskError("({0}) {1}".format(partition, ret)) logger.info("Resource disk ({0}) is mounted at {1} with fstype {2}", device, mount_point, fs) return mount_point
def process(self): try: RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr) skip_rdma_device = False retcode,out = shellutil.run_get_output("modinfo hv_network_direct") if retcode == 0: version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE) if version: v1 = int(version.groups(0)[0]) v2 = int(version.groups(0)[1]) if v1>4 or v1==4 and v2>0: logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later") skip_rdma_device = True else: logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.") else: logger.warn("RDMA: failed to get module info on hv_network_direct.") if not skip_rdma_device: RDMADeviceHandler.wait_rdma_device( self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec) RDMADeviceHandler.write_rdma_config_to_device( self.rdma_dev, self.ipv4_addr, self.mac_addr) RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr) except Exception as e: logger.error("RDMA: device processing failed: {0}".format(e))
def mount_dvd(self, max_retry=6, chk_err=True, dvd_device=None, mount_point=None): if dvd_device is None: dvd_device = self.get_dvd_device() if mount_point is None: mount_point = conf.get_dvd_mount_point() mountlist = shellutil.run_get_output("mount")[1] existing = self.get_mount_point(mountlist, dvd_device) if existing is not None: #Already mounted logger.info("{0} is already mounted at {1}", dvd_device, existing) return if not os.path.isdir(mount_point): os.makedirs(mount_point) for retry in range(0, max_retry): retcode = self.mount(dvd_device, mount_point, option="-o ro -t udf,iso9660", chk_err=chk_err) if retcode == 0: logger.info("Successfully mounted dvd") return if retry < max_retry - 1: logger.warn("Mount dvd failed: retry={0}, ret={1}", retry, retcode) time.sleep(5) if chk_err: raise OSUtilError("Failed to mount dvd.")
def get_first_if(self): """ Return the interface name, and ip addr of the first active non-loopback interface. """ iface='' expected=16 # how many devices should I expect... struct_size=40 # for 64bit the size is 40 bytes sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) buff=array.array('B', b'\0' * (expected * struct_size)) param = struct.pack('iL', expected*struct_size, buff.buffer_info()[0]) ret = fcntl.ioctl(sock.fileno(), 0x8912, param) retsize=(struct.unpack('iL', ret)[0]) if retsize == (expected * struct_size): logger.warn(('SIOCGIFCONF returned more than {0} up ' 'network interfaces.'), expected) sock = buff.tostring() primary = bytearray(self.get_primary_interface(), encoding='utf-8') for i in range(0, struct_size * expected, struct_size): iface=sock[i:i+16].split(b'\0', 1)[0] if len(iface) == 0 or self.is_loopback(iface) or iface != primary: # test the next one if len(iface) != 0 and not self.disable_route_warning: logger.info('interface [{0}] skipped'.format(iface)) continue else: # use this one logger.info('interface [{0}] selected'.format(iface)) break return iface.decode('latin-1'), socket.inet_ntoa(sock[i+20:i+24])
def _purge_agents(self): """ Remove from disk all directories and .zip files of unknown agents (without removing the current, running agent). """ path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) known_versions = [agent.version for agent in self.agents] if not is_current_agent_installed() and CURRENT_VERSION not in known_versions: logger.warn( u"Running Agent {0} was not found in the agent manifest - adding to list", CURRENT_VERSION) known_versions.append(CURRENT_VERSION) for agent_path in glob.iglob(path): try: name = fileutil.trim_ext(agent_path, "zip") m = AGENT_DIR_PATTERN.match(name) if m is not None and FlexibleVersion(m.group(1)) not in known_versions: if os.path.isfile(agent_path): logger.info(u"Purging outdated Agent file {0}", agent_path) os.remove(agent_path) else: logger.info(u"Purging outdated Agent directory {0}", agent_path) shutil.rmtree(agent_path) except Exception as e: logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) return
def init_sysinfo(self): osversion = "{0}:{1}-{2}-{3}:{4}".format(platform.system(), DISTRO_NAME, DISTRO_VERSION, DISTRO_CODE_NAME, platform.release()) self.sysinfo.append(TelemetryEventParam("OSVersion", osversion)) self.sysinfo.append( TelemetryEventParam("GAVersion", CURRENT_AGENT)) try: ram = self.osutil.get_total_mem() processors = self.osutil.get_processor_cores() self.sysinfo.append(TelemetryEventParam("RAM", ram)) self.sysinfo.append(TelemetryEventParam("Processors", processors)) except OSUtilError as e: logger.warn("Failed to get system info: {0}", e) try: protocol = self.protocol_util.get_protocol() vminfo = protocol.get_vminfo() self.sysinfo.append(TelemetryEventParam("VMName", vminfo.vmName)) self.sysinfo.append(TelemetryEventParam("TenantName", vminfo.tenantName)) self.sysinfo.append(TelemetryEventParam("RoleName", vminfo.roleName)) self.sysinfo.append(TelemetryEventParam("RoleInstanceName", vminfo.roleInstanceName)) self.sysinfo.append(TelemetryEventParam("ContainerId", vminfo.containerId)) except ProtocolError as e: logger.warn("Failed to get system info: {0}", e)
def update_tracked(ext_handlers): """ Track CGroups for all enabled extensions. Track CGroups for services created by enabled extensions. Stop tracking CGroups for not-enabled extensions. :param List(ExtHandler) ext_handlers: """ if not CGroups.enabled(): return not_enabled_extensions = set() for extension in ext_handlers: if extension.properties.state == u"enabled": CGroupsTelemetry.track_extension(extension.name) else: not_enabled_extensions.add(extension.name) names_now_tracked = set(CGroupsTelemetry._tracked.keys()) if CGroupsTelemetry.tracked_names != names_now_tracked: now_tracking = " ".join("[{0}]".format(name) for name in sorted(names_now_tracked)) if len(now_tracking): logger.info("After updating cgroup telemetry, tracking {0}".format(now_tracking)) else: logger.warn("After updating cgroup telemetry, tracking no cgroups.") CGroupsTelemetry.tracked_names = names_now_tracked
def wireserver_route_exists(self): """ Determine whether a route to the known wireserver ip already exists, and if so use that as the endpoint. This is true when running in a virtual network. :return: True if a route to KNOWN_WIRESERVER_IP exists. """ route_exists = False logger.info("Test for route to {0}".format(KNOWN_WIRESERVER_IP)) try: route_file = '/proc/net/route' if os.path.exists(route_file) and \ KNOWN_WIRESERVER_IP_ENTRY in open(route_file).read(): # reset self.gateway and self.routes # we do not need to alter the routing table self.endpoint = KNOWN_WIRESERVER_IP self.gateway = None self.routes = None route_exists = True logger.info("Route to {0} exists".format(KNOWN_WIRESERVER_IP)) else: logger.warn("No route exists to {0}".format(KNOWN_WIRESERVER_IP)) except Exception as e: logger.error( "Could not determine whether route exists to {0}: {1}".format( KNOWN_WIRESERVER_IP, e)) return route_exists
def save_event(self, data): if self.event_dir is None: logger.warn("Event reporter is not initialized.") return if not os.path.exists(self.event_dir): os.mkdir(self.event_dir) os.chmod(self.event_dir, 0o700) existing_events = os.listdir(self.event_dir) if len(existing_events) >= 1000: existing_events.sort() oldest_files = existing_events[:-999] logger.warn("Too many files under: {0}, removing oldest".format(self.event_dir)) try: for f in oldest_files: os.remove(os.path.join(self.event_dir, f)) except IOError as e: raise EventError(e) filename = os.path.join(self.event_dir, ustr(int(time.time() * 1000000))) try: with open(filename + ".tmp", 'wb+') as hfile: hfile.write(data.encode("utf-8")) os.rename(filename + ".tmp", filename + ".tld") except IOError as e: raise EventError("Failed to write events to file:{0}", e)
def _download(self): for uri in self.pkg.uris: if not HostPluginProtocol.is_default_channel() and self._fetch(uri.uri): break elif self.host is not None and self.host.ensure_initialized(): if not HostPluginProtocol.is_default_channel(): logger.warn("Download unsuccessful, falling back to host plugin") else: logger.verbose("Using host plugin as default channel") uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri) if self._fetch(uri, headers=headers): if not HostPluginProtocol.is_default_channel(): logger.verbose("Setting host plugin as default channel") HostPluginProtocol.set_default_channel(True) break else: logger.warn("Host plugin download unsuccessful") else: logger.error("No download channels available") if not os.path.isfile(self.get_agent_pkg_path()): msg = u"Unable to download Agent {0} from any URI".format(self.name) add_event( AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) raise UpdateError(msg) return
def collect_and_send_events(self): event_list = TelemetryEventList() event_dir = os.path.join(conf.get_lib_dir(), "events") event_files = os.listdir(event_dir) for event_file in event_files: if not event_file.endswith(".tld"): continue event_file_path = os.path.join(event_dir, event_file) try: data_str = self.collect_event(event_file_path) except EventError as e: logger.error("{0}", e) continue try: event = parse_event(data_str) self.add_sysinfo(event) event_list.events.append(event) except (ValueError, ProtocolError) as e: logger.warn("Failed to decode event file: {0}", e) continue if len(event_list.events) == 0: return try: protocol = self.protocol_util.get_protocol() protocol.report_event(event_list) except ProtocolError as e: logger.error("{0}", e)
def _ensure_no_orphans(self, orphan_wait_interval=ORPHAN_WAIT_INTERVAL): previous_pid_file, pid_file = self._write_pid_file() if previous_pid_file is not None: try: pid = fileutil.read_file(previous_pid_file) wait_interval = orphan_wait_interval while self.osutil.check_pid_alive(pid): wait_interval -= GOAL_STATE_INTERVAL if wait_interval <= 0: logger.warn( u"{0} forcibly terminated orphan process {1}", CURRENT_AGENT, pid) os.kill(pid, signal.SIGKILL) break logger.info( u"{0} waiting for orphan process {1} to terminate", CURRENT_AGENT, pid) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn( u"Exception occurred waiting for orphan agent to terminate: {0}", ustr(e)) return
def _load_error(self): try: self.error = GuestAgentError(self.get_agent_error_file()) self.error.load() logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) except Exception as e: logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
def put_vm_status(self, status_blob, sas_url, config_blob_type=None): """ Try to upload the VM status via the host plugin /status channel :param sas_url: the blob SAS url to pass to the host plugin :param config_blob_type: the blob type from the extension config :type status_blob: StatusBlob """ if not self.ensure_initialized(): raise ProtocolError("HostGAPlugin: HostGAPlugin is not available") if status_blob is None or status_blob.vm_status is None: raise ProtocolError("HostGAPlugin: Status blob was not provided") logger.verbose("HostGAPlugin: Posting VM status") try: blob_type = status_blob.type if status_blob.type else config_blob_type if blob_type == "BlockBlob": self._put_block_blob_status(sas_url, status_blob) else: self._put_page_blob_status(sas_url, status_blob) if not HostPluginProtocol.is_default_channel(): logger.info("HostGAPlugin: Setting host plugin as default channel") HostPluginProtocol.set_default_channel(True) except Exception as e: message = "HostGAPlugin: Exception Put VM status: {0}".format(e) logger.error(message) from azurelinuxagent.common.event import WALAEventOperation, report_event report_event(op=WALAEventOperation.ReportStatus, is_success=False, message=message) logger.warn("HostGAPlugin: resetting default channel") HostPluginProtocol.set_default_channel(False)
def is_provisioned(self): ''' A VM is considered provisionend *anytime* the provisioning sentinel file exists and not provisioned *anytime* the file is absent. If the VM was provisioned using an agent that did not record the VM unique identifier, the provisioning file will be re-written to include the identifier. A warning is logged *if* the VM unique identifier has changed since VM was provisioned. ''' if not os.path.isfile(self.provisioned_file_path()): return False s = fileutil.read_file(self.provisioned_file_path()).strip() if not self.osutil.is_current_instance_id(s): if len(s) > 0: logger.warn("VM is provisioned, " "but the VM unique identifier has changed -- " "clearing cached state") from azurelinuxagent.pa.deprovision \ import get_deprovision_handler deprovision_handler = get_deprovision_handler() deprovision_handler.run_changed_unique_id() self.write_provisioned() self.report_ready() return True
def add_to_extension_cgroup(name, pid=int(os.getpid())): """ Create cgroup directories for this extension in each of the hierarchies and add this process to the new cgroup. Should only be called when creating sub-processes and invoked inside the fork/exec window. As a result, there's no point in returning the CGroups object itself; the goal is to move the child process into the cgroup before the new code even starts running. :param str name: Short name of extension, suitable for naming directories in the filesystem :param int pid: Process id of extension to be added to the cgroup """ if not CGroups.enabled(): return if name == AGENT_NAME: logger.warn('Extension cgroup name cannot match agent cgroup name ({0})'.format(AGENT_NAME)) return try: logger.info("Move process {0} into cgroups for extension {1}".format(pid, name)) CGroups.for_extension(name).add(pid) except Exception as ex: logger.warn("Unable to move process {0} into cgroups for extension {1}: {2}".format(pid, name, ex))
def _operation(self): if self._send_telemetry_events_handler.stopped(): logger.warn( "{0} service is not running, skipping current iteration". format(self._send_telemetry_events_handler.get_thread_name())) return delete_all_event_files = True extension_handler_with_event_dirs = [] try: extension_handler_with_event_dirs = self._get_extension_events_dir_with_handler_name( conf.get_ext_log_dir()) if not extension_handler_with_event_dirs: logger.verbose("No Extension events directory exist") return for extension_handler_with_event_dir in extension_handler_with_event_dirs: handler_name = extension_handler_with_event_dir[0] handler_event_dir_path = extension_handler_with_event_dir[1] self._capture_extension_events(handler_name, handler_event_dir_path) except ServiceStoppedError: # Since the service stopped, we should not delete the extension files and retry sending them whenever # the telemetry service comes back up delete_all_event_files = False except Exception as error: msg = "Unknown error occurred when trying to collect extension events:{0}".format( textutil.format_exception(error)) add_event(op=WALAEventOperation.ExtensionTelemetryEventProcessing, message=msg, is_success=False) finally: # Always ensure that the events directory are being deleted each run except when Telemetry Service is stopped, # even if we run into an error and dont process them this run. if delete_all_event_files: self._ensure_all_events_directories_empty( extension_handler_with_event_dirs)
def http_request(method, url, data, headers=None, max_retry=3, chk_proxy=False): """ Sending http request to server On error, sleep 10 and retry max_retry times. """ logger.verbose("HTTP Req: {0} {1}", method, url) logger.verbose(" Data={0}", data) logger.verbose(" Header={0}", headers) host, port, secure, rel_uri = _parse_url(url) # Check proxy proxy_host, proxy_port = (None, None) if chk_proxy: proxy_host, proxy_port = get_http_proxy() # If httplib module is not built with ssl support. Fallback to http if secure and not hasattr(httpclient, "HTTPSConnection"): logger.warn("httplib is not built with ssl support") secure = False # If httplib module doesn't support https tunnelling. Fallback to http if secure and proxy_host is not None and proxy_port is not None \ and not hasattr(httpclient.HTTPSConnection, "set_tunnel"): logger.warn("httplib does not support https tunnelling " "(new in python 2.7)") secure = False for retry in range(0, max_retry): try: resp = _http_request(method, host, rel_uri, port=port, data=data, secure=secure, headers=headers, proxy_host=proxy_host, proxy_port=proxy_port) logger.verbose("HTTP Resp: Status={0}", resp.status) logger.verbose(" Header={0}", resp.getheaders()) return resp except httpclient.HTTPException as e: logger.warn('HTTPException {0}, args:{1}', e, repr(e.args)) except IOError as e: logger.warn('Socket IOError {0}, args:{1}', e, repr(e.args)) if retry < max_retry - 1: logger.info("Retry={0}, {1} {2}", retry, method, url) time.sleep(RETRY_WAITING_INTERVAL) if url is not None and len(url) > 100: url_log = url[0: 100] # In case the url is too long else: url_log = url raise HttpError("HTTP Err: {0} {1}".format(method, url_log))
def mount_dvd(self, max_retry=6, chk_err=True, dvd_device=None, mount_point=None, sleep_time=5): if dvd_device is None: dvd_device = self.get_dvd_device() if mount_point is None: mount_point = conf.get_dvd_mount_point() mount_list = shellutil.run_get_output("mount")[1] existing = self.get_mount_point(mount_list, dvd_device) if existing is not None: # already mounted logger.info("{0} is already mounted at {1}", dvd_device, existing) return if not os.path.isdir(mount_point): os.makedirs(mount_point) err = '' for retry in range(1, max_retry): return_code, err = self.mount(dvd_device, mount_point, option="-o ro -t udf,iso9660", chk_err=False) if return_code == 0: logger.info("Successfully mounted dvd") return else: logger.warn( "Mounting dvd failed [retry {0}/{1}, sleeping {2} sec]", retry, max_retry - 1, sleep_time) if retry < max_retry: time.sleep(sleep_time) if chk_err: raise OSUtilError("Failed to mount dvd device", inner=err)
def get_primary_interface(self): """ Get the name of the primary interface, which is the one with the default route attached to it; if there are multiple default routes, the primary has the lowest Metric. :return: the interface which has the default route """ # from linux/route.h RTF_GATEWAY = 0x02 DEFAULT_DEST = "00000000" hdr_iface = "Iface" hdr_dest = "Destination" hdr_flags = "Flags" hdr_metric = "Metric" idx_iface = -1 idx_dest = -1 idx_flags = -1 idx_metric = -1 primary = None primary_metric = None if not self.disable_route_warning: logger.info("Examine /proc/net/route for primary interface") with open('/proc/net/route') as routing_table: idx = 0 for header in filter(lambda h: len(h) > 0, routing_table.readline().strip(" \n").split("\t")): if header == hdr_iface: idx_iface = idx elif header == hdr_dest: idx_dest = idx elif header == hdr_flags: idx_flags = idx elif header == hdr_metric: idx_metric = idx idx = idx + 1 for entry in routing_table.readlines(): route = entry.strip(" \n").split("\t") if route[idx_dest] == DEFAULT_DEST and int(route[idx_flags]) & RTF_GATEWAY == RTF_GATEWAY: metric = int(route[idx_metric]) iface = route[idx_iface] if primary is None or metric < primary_metric: primary = iface primary_metric = metric if primary is None: primary = '' if not self.disable_route_warning: with open('/proc/net/route') as routing_table_fh: routing_table_text = routing_table_fh.read() logger.warn('Could not determine primary interface, ' 'please ensure /proc/net/route is correct') logger.warn('Contents of /proc/net/route:\n{0}'.format(routing_table_text)) logger.warn('Primary interface examination will retry silently') self.disable_route_warning = True else: logger.info('Primary interface is [{0}]'.format(primary)) self.disable_route_warning = False return primary
def provision_network_direct_rdma(self): RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr) if not conf.enable_check_rdma_driver(): logger.info("RDMA: skip checking RDMA driver version") RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr) return skip_rdma_device = False module_name = "hv_network_direct" retcode, out = shellutil.run_get_output("modprobe -R %s" % module_name, chk_err=False) if retcode == 0: module_name = out.strip() else: logger.info("RDMA: failed to resolve module name. Use original name") retcode, out = shellutil.run_get_output("modprobe %s" % module_name) if retcode != 0: logger.error("RDMA: failed to load module %s" % module_name) return retcode, out = shellutil.run_get_output("modinfo %s" % module_name) if retcode == 0: version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE) # pylint: disable=W1401 if version: v1 = int(version.groups(0)[0]) v2 = int(version.groups(0)[1]) if v1 > 4 or v1 == 4 and v2 > 0: logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later") skip_rdma_device = True else: logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.") else: logger.warn("RDMA: failed to get module info on hv_network_direct.") if not skip_rdma_device: RDMADeviceHandler.wait_rdma_device( self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec) RDMADeviceHandler.write_rdma_config_to_device( self.rdma_dev, self.ipv4_addr, self.mac_addr) RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr)
def migrate_handler_state(): """ Migrate handler state and status (if they exist) from an agent-owned directory into the handler-owned config directory Notes: - The v2.0.x branch wrote all handler-related state into the handler-owned config directory (e.g., /var/lib/waagent/Microsoft.Azure.Extensions.LinuxAsm-2.0.1/config). - The v2.1.x branch original moved that state into an agent-owned handler state directory (e.g., /var/lib/waagent/handler_state). - This move can cause v2.1.x agents to multiply invoke a handler's install command. It also makes clean-up more difficult since the agent must remove the state as well as the handler directory. """ handler_state_path = os.path.join(conf.get_lib_dir(), "handler_state") if not os.path.isdir(handler_state_path): return for handler_path in glob.iglob(os.path.join(handler_state_path, "*")): handler = os.path.basename(handler_path) handler_config_path = os.path.join(conf.get_lib_dir(), handler, "config") if os.path.isdir(handler_config_path): for file in ("State", "Status"): from_path = os.path.join(handler_state_path, handler, file.lower()) to_path = os.path.join(handler_config_path, "Handler" + file) if os.path.isfile(from_path) and not os.path.isfile(to_path): try: shutil.move(from_path, to_path) except Exception as e: logger.warn( "Exception occurred migrating {0} {1} file: {2}", handler, file, str(e)) try: shutil.rmtree(handler_state_path) except Exception as e: logger.warn("Exception occurred removing {0}: {1}", handler_state_path, str(e)) return
def _download(self): for uri in self.pkg.uris: if not HostPluginProtocol.is_default_channel() and self._fetch(uri.uri): break elif self.host is not None and self.host.ensure_initialized(): if not HostPluginProtocol.is_default_channel(): logger.warn("Download failed, switching to host plugin") else: logger.verbose("Using host plugin as default channel") uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri) try: if self._fetch(uri, headers=headers, use_proxy=False): if not HostPluginProtocol.is_default_channel(): logger.verbose("Setting host plugin as default channel") HostPluginProtocol.set_default_channel(True) break else: logger.warn("Host plugin download failed") # If the HostPlugin rejects the request, # let the error continue, but set to use the HostPlugin except ResourceGoneError: HostPluginProtocol.set_default_channel(True) raise else: logger.error("No download channels available") if not os.path.isfile(self.get_agent_pkg_path()): msg = u"Unable to download Agent {0} from any URI".format(self.name) add_event( AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) raise UpdateError(msg)
def test_telemetry_logger_add_log_event(self, mock_lib_dir, *_): mock_lib_dir.return_value = self.lib_dir __event_logger__.event_dir = self.event_dir prefix = "YoloLogger" logger.add_logger_appender(logger.AppenderType.TELEMETRY, logger.LogLevel.WARNING, path=add_log_event) logger.set_prefix(prefix) logger.warn('Test Log - Warning') event_files = os.listdir(__event_logger__.event_dir) self.assertEqual(1, len(event_files)) log_file_event = os.path.join(__event_logger__.event_dir, event_files[0]) try: with open(log_file_event) as logfile: logcontent = logfile.read() # Checking the contents of the event file. self.assertIn("Test Log - Warning", logcontent) except Exception as e: self.assertFalse(True, "The log file looks like it isn't correctly setup for this test. Take a look. " "{0}".format(e))
def test_telemetry_logger_check_all_file_logs_written_when_events_gt_MAX_NUMBER_OF_EVENTS(self, mock_lib_dir, *_): mock_lib_dir.return_value = self.lib_dir __event_logger__.event_dir = self.event_dir no_of_log_statements = MAX_NUMBER_OF_EVENTS + 100 exception_caught = False prefix = "YoloLogger" logger.add_logger_appender(logger.AppenderType.FILE, logger.LogLevel.INFO, path=self.log_file) logger.add_logger_appender(logger.AppenderType.TELEMETRY, logger.LogLevel.WARNING, path=add_log_event) logger.set_prefix(prefix) # Calling logger.warn no_of_log_statements times would cause the telemetry appender to writing # 1000 events into the events dir, and then drop the remaining events. It should not generate the RuntimeError try: for i in range(0, no_of_log_statements): logger.warn('Test Log - {0} - 1 - Warning'.format(i)) except RuntimeError: exception_caught = True self.assertFalse(exception_caught, msg="Caught a Runtime Error. This should not have been raised.") self.assertEqual(MAX_NUMBER_OF_EVENTS, len(os.listdir(__event_logger__.event_dir))) try: with open(self.log_file) as logfile: logcontent = logfile.readlines() # Checking the last log entry. # Subtracting 1 as range is exclusive of the upper bound self.assertIn("WARNING {1} Test Log - {0} - 1 - Warning".format(no_of_log_statements - 1, prefix), logcontent[-1]) # Checking the 1001st log entry. We know that 1001st entry would generate a PERIODIC message of too many # events, which should be captured in the log file as well. self.assertRegex(logcontent[1001], r"(.*WARNING\s*{0}\s*\[PERIODIC\]\s*Too many files under:.*{1}, " r"current count\:\s*\d+,\s*removing oldest\s*.*)".format(prefix, self.event_dir)) except Exception as e: self.assertFalse(True, "The log file looks like it isn't correctly setup for this test. " "Take a look. {0}".format(e))
def send_cgroup_telemetry(self): if self.last_cgroup_telemetry is None: self.last_cgroup_telemetry = datetime.datetime.utcnow() if datetime.datetime.utcnow() >= (self.last_telemetry_heartbeat + MonitorHandler.CGROUP_TELEMETRY_PERIOD): try: for cgroup_name, metrics in CGroupsTelemetry.collect_all_tracked().items(): for metric_group, metric_name, value in metrics: if value > 0: report_metric(metric_group, metric_name, cgroup_name, value) except Exception as e: logger.warn("Monitor: failed to collect cgroups performance metrics: {0}", ustr(e)) logger.verbose(traceback.format_exc()) # Look for extension cgroups we're not already tracking and track them try: CGroupsTelemetry.update_tracked(self.protocol.client.get_current_handlers()) except Exception as e: logger.warn("Monitor: failed to update cgroups tracked extensions: {0}", ustr(e)) logger.verbose(traceback.format_exc()) self.last_cgroup_telemetry = datetime.datetime.utcnow()
def get_first_if(self): """Return the interface name, and ip addr of the management interface. We need to add a struct_size check here because, curiously, our 64bit platform is identified by python in Azure(Stack) as 32 bit and without adjusting the struct_size, we can't get the information we need. I believe this may be caused by only python i686 being shipped with BIG-IP instead of python x86_64?? """ iface = '' expected = 16 # how many devices should I expect... python_arc = platform.architecture()[0] if python_arc == '64bit': struct_size = 40 # for 64bit the size is 40 bytes else: struct_size = 32 # for 32bit the size is 32 bytes sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) buff = array.array('B', b'\0' * (expected * struct_size)) param = struct.pack('iL', expected * struct_size, buff.buffer_info()[0]) ret = fcntl.ioctl(sock.fileno(), 0x8912, param) retsize = (struct.unpack('iL', ret)[0]) if retsize == (expected * struct_size): logger.warn(('SIOCGIFCONF returned more than {0} up ' 'network interfaces.'), expected) sock = array_to_bytes(buff) for i in range(0, struct_size * expected, struct_size): iface = self._format_single_interface_name(sock, i) # Azure public was returning "lo:1" when deploying WAF if b'lo' in iface: continue else: break return iface.decode('latin-1'), socket.inet_ntoa(sock[i + 20:i + 24]) # pylint: disable=undefined-loop-variable
def _invoke_cgroup_operation(self, operation, error_message, on_error=None): """ Ensures the given operation is invoked only if cgroups are enabled and traps any errors on the operation. """ if not self.enabled(): return None try: return operation() except Exception as exception: logger.warn("{0} Error: {1}".format(error_message, ustr(exception))) if on_error is not None: try: on_error(exception) except Exception as exception: logger.warn( "CGroupConfigurator._invoke_cgroup_operation: {0}". format(ustr(exception)))
def _emit_changes_in_default_configuration(): try: def log_event(msg): logger.info(msg) add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=msg) def log_if_int_changed_from_default(name, current): default = conf.get_int_default_value(name) if default != current: log_event( "{0} changed from its default: {1}. New value: {2}". format(name, default, current)) def log_if_op_disabled(name, value): if not value: log_event( "{0} is set to False, not processing the operation". format(name)) log_if_int_changed_from_default("Extensions.GoalStatePeriod", conf.get_goal_state_period()) log_if_op_disabled("OS.EnableFirewall", conf.enable_firewall()) log_if_op_disabled("Extensions.Enabled", conf.get_extensions_enabled()) if conf.enable_firewall(): log_if_int_changed_from_default( "OS.EnableFirewallPeriod", conf.get_enable_firewall_period()) if conf.get_lib_dir() != "/var/lib/waagent": log_event("lib dir is in an unexpected location: {0}".format( conf.get_lib_dir())) except Exception as e: logger.warn("Failed to log changes in configuration: {0}", ustr(e))
def __setup_binary_file(self): binary_file_path = os.path.join(conf.get_lib_dir(), self.BINARY_FILE_NAME) try: fileutil.write_file( binary_file_path, self.__BINARY_CONTENTS.format( egg_path=self._current_agent_executable_path, wire_ip=self._dst_ip, user_id=self._uid, wait=self._wait, py_path=sys.executable)) logger.info( "Successfully updated the Binary file {0} for firewall setup". format(binary_file_path)) except Exception: logger.warn( "Unable to setup binary file, removing the service unit file {0} to ensure its not run on system reboot" .format(self.get_service_file_path())) self.__remove_file_without_raising(binary_file_path) self.__remove_file_without_raising(self.get_service_file_path()) raise
def save_event(self, data): if self.event_dir is None: logger.warn( "Cannot save event -- Event reporter is not initialized.") return try: fileutil.mkdir(self.event_dir, mode=0o700) except (IOError, OSError) as e: msg = "Failed to create events folder {0}. Error: {1}".format( self.event_dir, ustr(e)) raise EventError(msg) try: existing_events = os.listdir(self.event_dir) if len(existing_events) >= MAX_NUMBER_OF_EVENTS: logger.periodic_warn( logger.EVERY_MINUTE, "[PERIODIC] Too many files under: {0}, current count: {1}, " "removing oldest event files".format( self.event_dir, len(existing_events))) existing_events.sort() oldest_files = existing_events[:-999] for event_file in oldest_files: os.remove(os.path.join(self.event_dir, event_file)) except (IOError, OSError) as e: msg = "Failed to remove old events from events folder {0}. Error: {1}".format( self.event_dir, ustr(e)) raise EventError(msg) filename = os.path.join(self.event_dir, ustr(int(time.time() * 1000000))) try: with open(filename + ".tmp", 'wb+') as hfile: hfile.write(data.encode("utf-8")) os.rename(filename + ".tmp", filename + AGENT_EVENT_FILE_EXTENSION) except (IOError, OSError) as e: msg = "Failed to write events to file: {0}".format(e) raise EventError(msg)
def add_event(name, op=WALAEventOperation.Unknown, is_success=True, duration=0, version=str(CURRENT_VERSION), message="", log_event=True, reporter=__event_logger__): if reporter.event_dir is None: logger.warn("Cannot add event -- Event reporter is not initialized.") _log_event(name, op, message, duration, is_success=is_success) return if should_emit_event(name, version, op, is_success): mark_event_status(name, version, op, is_success) reporter.add_event(name, op=op, is_success=is_success, duration=duration, version=str(version), message=message, log_event=log_event)
def _send_logs(self): msg = None success = False try: with open(COMPRESSED_ARCHIVE_PATH, "rb") as fh: archive_content = fh.read() self.protocol.upload_logs(archive_content) msg = "Successfully uploaded logs." logger.info(msg) success = True except Exception as e: msg = "Failed to upload logs. Error: {0}".format(ustr(e)) logger.warn(msg) finally: add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.LogCollection, is_success=success, message=msg, log_event=False)
def image_origin(self): """ An integer value describing the origin of the image. 0 -> unknown 1 -> custom - user created image 2 -> endorsed - See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/endorsed-distros 3 -> platform - non-endorsed image that is available in the Azure Marketplace. """ try: if self.publisher == "": return IMDS_IMAGE_ORIGIN_CUSTOM if ComputeInfo.__matcher.is_match(self.publisher, self.offer, self.sku, self.version): return IMDS_IMAGE_ORIGIN_ENDORSED else: return IMDS_IMAGE_ORIGIN_PLATFORM except Exception as e: logger.warn("Could not determine the image origin from IMDS: {0}", str(e)) return IMDS_IMAGE_ORIGIN_UNKNOWN
def fetch_full_goal_state(self, wire_client): try: logger.info('Fetching goal state [incarnation {0}]', self.incarnation) xml_text = wire_client.fetch_config(self._hosting_env_uri, wire_client.get_header()) self.hosting_env = HostingEnv(xml_text) xml_text = wire_client.fetch_config(self._shared_conf_uri, wire_client.get_header()) self.shared_conf = SharedConfig(xml_text) if self._certs_uri is not None: xml_text = wire_client.fetch_config(self._certs_uri, wire_client.get_header_for_cert()) self.certs = Certificates(xml_text) if self._remote_access_uri is not None: xml_text = wire_client.fetch_config(self._remote_access_uri, wire_client.get_header_for_cert()) self.remote_access = RemoteAccess(xml_text) except Exception as exception: logger.warn("Fetching the goal state failed: {0}", ustr(exception)) raise ProtocolError(msg="Error fetching goal state", inner=exception) finally: logger.info('Fetch goal state completed')
def set_properties(name, obj, data): if isinstance(obj, DataContract): validate_param("Property '{0}'".format(name), data, dict) for prob_name, prob_val in data.items(): prob_full_name = "{0}.{1}".format(name, prob_name) try: prob = getattr(obj, prob_name) except AttributeError: logger.warn("Unknown property: {0}", prob_full_name) continue prob = set_properties(prob_full_name, prob, prob_val) setattr(obj, prob_name, prob) return obj elif isinstance(obj, DataContractList): validate_param("List '{0}'".format(name), data, list) for item_data in data: item = obj.item_cls() item = set_properties(name, item, item_data) obj.append(item) return obj else: return data
def read_response_error(resp): result = '' if resp is not None: try: result = "[HTTP Failed] [{0}: {1}] {2}".format( resp.status, resp.reason, resp.read()) # this result string is passed upstream to several methods # which do a raise HttpError() or a format() of some kind; # as a result it cannot have any unicode characters if PY_VERSION_MAJOR < 3: result = ustr(result, encoding='ascii', errors='ignore') else: result = result\ .encode(encoding='ascii', errors='ignore')\ .decode(encoding='ascii', errors='ignore') result = textutil.replace_non_ascii(result) except Exception: logger.warn(traceback.format_exc()) return result
def run(self): self.ext_handlers, etag = None, None try: self.protocol = self.protocol_util.get_protocol() self.ext_handlers, etag = self.protocol.get_ext_handlers() except Exception as e: msg = u"Exception retrieving extension handlers: {0}".format( ustr(e)) logger.warn(msg) add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg) return try: msg = u"Handle extensions updates for incarnation {0}".format(etag) logger.verbose(msg) # Log status report success on new config self.log_report = True self.handle_ext_handlers(etag) self.last_etag = etag self.report_ext_handlers_status() self.cleanup_outdated_handlers() except RestartError: raise except Exception as e: msg = u"Exception processing extension handlers: {0}".format( ustr(e)) logger.warn(msg) add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg) return
def initialize_vminfo_common_parameters(self, protocol): """ Initializes the common parameters that come from the goal state and IMDS """ # create an index of the event parameters for faster updates parameters = {} for p in self._common_parameters: parameters[p.name] = p try: vminfo = protocol.get_vminfo() parameters[CommonTelemetryEventSchema. TenantName].value = vminfo.tenantName parameters[ CommonTelemetryEventSchema.RoleName].value = vminfo.roleName parameters[CommonTelemetryEventSchema. RoleInstanceName].value = vminfo.roleInstanceName except Exception as e: logger.warn( "Failed to get VM info from goal state; will be missing from telemetry: {0}", ustr(e)) try: imds_client = get_imds_client(protocol.get_endpoint()) imds_info = imds_client.get_compute() parameters[ CommonTelemetryEventSchema.Location].value = imds_info.location parameters[CommonTelemetryEventSchema. SubscriptionId].value = imds_info.subscriptionId parameters[CommonTelemetryEventSchema. ResourceGroupName].value = imds_info.resourceGroupName parameters[CommonTelemetryEventSchema.VMId].value = imds_info.vmId parameters[CommonTelemetryEventSchema.ImageOrigin].value = int( imds_info.image_origin) except Exception as e: logger.warn( "Failed to get IMDS info; will be missing from telemetry: {0}", ustr(e))
def get_first_if(self): """ Return the interface name, and ip addr of the first active non-loopback interface. """ iface = '' expected = 16 # how many devices should I expect... # for 64bit the size is 40 bytes # for 32bit the size is 32 bytes python_arc = platform.architecture()[0] struct_size = 32 if python_arc == '32bit' else 40 sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) buff = array.array('B', b'\0' * (expected * struct_size)) param = struct.pack('iL', expected * struct_size, buff.buffer_info()[0]) ret = fcntl.ioctl(sock.fileno(), 0x8912, param) retsize = (struct.unpack('iL', ret)[0]) if retsize == (expected * struct_size): logger.warn(('SIOCGIFCONF returned more than {0} up ' 'network interfaces.'), expected) sock = buff.tostring() primary = bytearray(self.get_primary_interface(), encoding='utf-8') for i in range(0, struct_size * expected, struct_size): iface = sock[i:i + 16].split(b'\0', 1)[0] if len(iface) == 0 or self.is_loopback(iface) or iface != primary: # test the next one if len(iface) != 0 and not self.disable_route_warning: logger.info('Interface [{0}] skipped'.format(iface)) continue else: # use this one logger.info('Interface [{0}] selected'.format(iface)) break return iface.decode('latin-1'), socket.inet_ntoa(sock[i + 20:i + 24])
def start_extension_command(self, extension_name, command, shell, cwd, env, stdout, stderr): """ Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup :param extension_name: The extension executing the command :param command: The command to invoke :param cwd: The working directory for the command :param env: The environment to pass to the command's process :param stdout: File object to redirect stdout to :param stderr: File object to redirect stderr to """ if not self.enabled(): process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) else: process, extension_cgroups = self._cgroups_api.start_extension_command( extension_name, command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr) try: for cgroup in extension_cgroups: CGroupsTelemetry.track_cgroup(cgroup) except Exception as e: logger.warn( "Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. Error: {1}" .format(cgroup.path, ustr(e))) return process
def collect_and_send_events(self): if self.last_event_collection is None: self.last_event_collection = datetime.datetime.utcnow() - MonitorHandler.EVENT_COLLECTION_PERIOD if datetime.datetime.utcnow() >= (self.last_event_collection + MonitorHandler.EVENT_COLLECTION_PERIOD): try: event_list = TelemetryEventList() event_dir = os.path.join(conf.get_lib_dir(), "events") event_files = os.listdir(event_dir) for event_file in event_files: if not event_file.endswith(".tld"): continue event_file_path = os.path.join(event_dir, event_file) try: data_str = self.collect_event(event_file_path) except EventError as e: logger.error("{0}", e) continue try: event = parse_event(data_str) self.add_sysinfo(event) event_list.events.append(event) except (ValueError, ProtocolError) as e: logger.warn("Failed to decode event file: {0}", e) continue if len(event_list.events) == 0: return try: self.protocol.report_event(event_list) except ProtocolError as e: logger.error("{0}", e) except Exception as e: logger.warn("Failed to send events: {0}", e) self.last_event_collection = datetime.datetime.utcnow()
def _write_pid_file(self): pid_files = self._get_pid_files() pid_dir, pid_name, pid_re = self._get_pid_parts() previous_pid_file = None if len(pid_files) <= 0 else pid_files[-1] pid_index = -1 \ if previous_pid_file is None \ else int(pid_re.match(os.path.basename(previous_pid_file)).group(1)) pid_file = os.path.join(pid_dir, "{0}_{1}".format(pid_index + 1, pid_name)) try: fileutil.write_file(pid_file, ustr(os.getpid())) logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid())) except Exception as e: pid_file = None logger.warn( u"Expection writing goal state agent {0} pid to {1}: {2}", CURRENT_AGENT, pid_file, ustr(e)) return pid_files, pid_file
def add_event(name, op="", is_success=True, duration=0, version=CURRENT_VERSION, message="", evt_type="", is_internal=False, reporter=__event_logger__): log = logger.info if is_success else logger.error log("Event: name={0}, op={1}, message={2}", name, op, message) if reporter.event_dir is None: logger.warn("Event reporter is not initialized.") return reporter.add_event(name, op=op, is_success=is_success, duration=duration, version=str(version), message=message, evt_type=evt_type, is_internal=is_internal)
def run(self, child_args=None): logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) self.check_pid() # If FIPS is enabled, set the OpenSSL environment variable # Note: # -- Subprocesses inherit the current environment if conf.get_fips_enabled(): os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1' while self.running: try: self.daemon(child_args) except Exception as e: err_msg = traceback.format_exc() add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg), op=WALAEventOperation.UnhandledError) logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon") time.sleep(15)