def add_user(self, username, encrypted_password, account_expiration): try: expiration_date = (account_expiration + timedelta(days=1)).strftime(DATE_FORMAT) logger.verbose("Adding user {0} with expiration date {1}" .format(username, expiration_date)) self.os_util.useradd(username, expiration_date, REMOTE_ACCESS_ACCOUNT_COMMENT) except OSError as oe: logger.error("Error adding user {0}. {1}" .format(username, oe.strerror)) return except Exception as e: logger.error("Error adding user {0}. {1}".format(username, ustr(e))) return try: prv_key = os.path.join(conf.get_lib_dir(), TRANSPORT_PRIVATE_CERT) pwd = self.cryptUtil.decrypt_secret(encrypted_password, prv_key) self.os_util.chpasswd(username, pwd, conf.get_password_cryptid(), conf.get_password_crypt_salt_len()) self.os_util.conf_sudoer(username) logger.info("User '{0}' added successfully with expiration in {1}" .format(username, expiration_date)) return except OSError as oe: self.handle_failed_create(username, oe.strerror) except Exception as e: self.handle_failed_create(username, ustr(e))
def run(self): # if provisioning is already done, return provisioned = os.path.join(conf.get_lib_dir(), "provisioned") if os.path.isfile(provisioned): logger.info("Provisioning already completed, skipping.") return thumbprint = None # If provision is not enabled, report ready and then return if not conf.get_provision_enabled(): logger.info("Provisioning is disabled, skipping.") else: logger.info("Running default provisioning handler") try: if not self.validate_cloud_init(is_expected=False): raise ProvisionError("cloud-init appears to be running, " "this is not expected, cannot continue") logger.info("Copying ovf-env.xml") ovf_env = self.protocol_util.copy_ovf_env() self.protocol_util.get_protocol_by_file() self.report_not_ready("Provisioning", "Starting") logger.info("Starting provisioning") self.provision(ovf_env) thumbprint = self.reg_ssh_host_key() self.osutil.restart_ssh_service() self.report_event("Provision succeed", is_success=True) except (ProtocolError, ProvisionError) as e: self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) logger.error("Provisioning failed: {0}", ustr(e)) return # write out provisioned file and report Ready fileutil.write_file(provisioned, "") self.report_ready(thumbprint) logger.info("Provisioning complete")
def run_get_output(cmd, chk_err=True, log_cmd=True): """ Wrapper for subprocess.check_output. Execute 'cmd'. Returns return code and STDOUT, trapping expected exceptions. Reports exceptions to Error if chk_err parameter is True """ if log_cmd: logger.verbose(u"Run '{0}'", cmd) try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) output = ustr(output, encoding='utf-8', errors="backslashreplace") except subprocess.CalledProcessError as e: output = ustr(e.output, encoding='utf-8', errors="backslashreplace") if chk_err: if log_cmd: logger.error(u"Command: '{0}'", e.cmd) logger.error(u"Return code: {0}", e.returncode) logger.error(u"Result: {0}", output) return e.returncode, output return 0, output
def run(self): #If provision is enabled, run default provision handler if conf.get_provision_enabled(): super(UbuntuProvisionHandler, self).run() return logger.info("run Ubuntu provision handler") provisioned = os.path.join(conf.get_lib_dir(), "provisioned") if os.path.isfile(provisioned): return logger.info("Waiting cloud-init to copy ovf-env.xml.") self.wait_for_ovfenv() protocol = self.protocol_util.get_protocol() self.report_not_ready("Provisioning", "Starting") logger.info("Sleep 15 seconds to prevent throttling") time.sleep(15) #Sleep to prevent throttling try: logger.info("Wait for ssh host key to be generated.") thumbprint = self.wait_for_ssh_host_key() fileutil.write_file(provisioned, "") logger.info("Finished provisioning") except ProvisionError as e: logger.error("Provision failed: {0}", e) self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) return self.report_ready(thumbprint) self.report_event("Provision succeed", is_success=True)
def _write_pid_file(self): pid_files = self._get_pid_files() pid_dir, pid_name, pid_re = self._get_pid_parts() previous_pid_file = None \ if len(pid_files) <= 0 \ else pid_files[-1] pid_index = -1 \ if previous_pid_file is None \ else int(pid_re.match(os.path.basename(previous_pid_file)).group(1)) pid_file = os.path.join(pid_dir, "{0}_{1}".format(pid_index+1, pid_name)) try: fileutil.write_file(pid_file, ustr(os.getpid())) logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid())) except Exception as e: pid_file = None logger.warn( u"Expection writing goal state agent {0} pid to {1}: {2}", CURRENT_AGENT, pid_file, ustr(e)) return pid_files, pid_file
def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]): """ Wrapper for subprocess.check_output. Execute 'cmd'. Returns return code and STDOUT, trapping expected exceptions. Reports exceptions to Error if chk_err parameter is True """ if log_cmd: logger.verbose(u"Command: [{0}]", cmd) try: output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) output = ustr(output, encoding='utf-8', errors="backslashreplace") except subprocess.CalledProcessError as e: output = ustr(e.output, encoding='utf-8', errors="backslashreplace") if chk_err: msg = u"Command: [{0}], " \ u"return code: [{1}], " \ u"result: [{2}]".format(cmd, e.returncode, output) if e.returncode in expected_errors: logger.info(msg) else: logger.error(msg) return e.returncode, output except Exception as e: if chk_err: logger.error(u"Command [{0}] raised unexpected exception: [{1}]" .format(cmd, ustr(e))) return -1, ustr(e) return 0, output
def copy_ovf_env(self): """ Copy ovf env file from dvd to hard disk. Remove password before save it to the disk """ dvd_mount_point = conf.get_dvd_mount_point() ovf_file_path_on_dvd = os.path.join(dvd_mount_point, OVF_FILE_NAME) tag_file_path_on_dvd = os.path.join(dvd_mount_point, TAG_FILE_NAME) try: self.osutil.mount_dvd() ovfxml = fileutil.read_file(ovf_file_path_on_dvd, remove_bom=True) ovfenv = OvfEnv(ovfxml) ovfxml = re.sub("<UserPassword>.*?<", "<UserPassword>*<", ovfxml) ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME) fileutil.write_file(ovf_file_path, ovfxml) if os.path.isfile(tag_file_path_on_dvd): logger.info("Found {0} in provisioning ISO", TAG_FILE_NAME) tag_file_path = os.path.join(conf.get_lib_dir(), TAG_FILE_NAME) shutil.copyfile(tag_file_path_on_dvd, tag_file_path) except (OSUtilError, IOError) as e: raise ProtocolError(ustr(e)) try: self.osutil.umount_dvd() self.osutil.eject_dvd() except OSUtilError as e: logger.warn(ustr(e)) return ovfenv
def _load_error(self): try: self.error = GuestAgentError(self.get_agent_error_file()) self.error.load() logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error)) except Exception as e: logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
def run(self): # If provision is enabled, run default provision handler if conf.get_provision_enabled(): logger.warn("Provisioning flag is enabled, this is not typical" "in Ubuntu, please ensure your config is correct.") super(UbuntuProvisionHandler, self).run() return provisioned = os.path.join(conf.get_lib_dir(), "provisioned") if os.path.isfile(provisioned): logger.info("Provisioning already completed, skipping.") return logger.info("Running Ubuntu provisioning handler") self.wait_for_ovfenv() self.protocol_util.get_protocol() self.report_not_ready("Provisioning", "Starting") try: thumbprint = self.wait_for_ssh_host_key() fileutil.write_file(provisioned, "") logger.info("Finished provisioning") except ProvisionError as e: logger.error("Provisioning failed: {0}", ustr(e)) self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) return self.report_ready(thumbprint) self.report_event("Provision succeed", is_success=True)
def run(self): # If provision is enabled, run default provision handler if conf.get_provision_enabled(): logger.warn("Provisioning flag is enabled, which overrides using " "cloud-init; running the default provisioning code") super(CloudInitProvisionHandler, self).run() return try: if super(CloudInitProvisionHandler, self).is_provisioned(): logger.info("Provisioning already completed, skipping.") return utc_start = datetime.utcnow() logger.info("Running CloudInit provisioning handler") self.wait_for_ovfenv() self.protocol_util.get_protocol() self.report_not_ready("Provisioning", "Starting") thumbprint = self.wait_for_ssh_host_key() self.write_provisioned() logger.info("Finished provisioning") self.report_ready(thumbprint) self.report_event("Provisioning with cloud-init succeeded ({0}s)".format(self._get_uptime_seconds()), is_success=True, duration=elapsed_milliseconds(utc_start)) except ProvisionError as e: msg = "Provisioning with cloud-init failed: {0} ({1}s)".format(ustr(e), self._get_uptime_seconds()) logger.error(msg) self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(msg) return
def handle_ext_handler(self, ext_handler, etag): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) ext_handler_i.decide_version() if not ext_handler_i.is_upgrade and self.last_etag == etag: if self.log_etag: ext_handler_i.logger.verbose("Version {0} is current for etag {1}", ext_handler_i.pkg.version, etag) self.log_etag = False return self.log_etag = True try: state = ext_handler.properties.state ext_handler_i.logger.info("Expected handler state: {0}", state) if state == "enabled": self.handle_enable(ext_handler_i) elif state == u"disabled": self.handle_disable(ext_handler_i) elif state == u"uninstall": self.handle_uninstall(ext_handler_i) else: message = u"Unknown ext handler state:{0}".format(state) raise ExtensionError(message) except ExtensionError as e: ext_handler_i.set_handler_status(message=ustr(e), code=-1) ext_handler_i.report_event(message=ustr(e), is_success=False)
def report_ext_handler_status(self, vm_status, ext_handler): ext_handler_i = ExtHandlerInstance(ext_handler, self.protocol) handler_status = ext_handler_i.get_handler_status() if handler_status is None: return guid = self.get_upgrade_guid(ext_handler.name) if guid is not None: handler_status.upgradeGuid = guid handler_state = ext_handler_i.get_handler_state() if handler_state != ExtHandlerState.NotInstalled: try: active_exts = ext_handler_i.report_ext_status() handler_status.extensions.extend(active_exts) except ExtensionError as e: ext_handler_i.set_handler_status(message=ustr(e), code=-1) try: heartbeat = ext_handler_i.collect_heartbeat() if heartbeat is not None: handler_status.status = heartbeat.get('status') except ExtensionError as e: ext_handler_i.set_handler_status(message=ustr(e), code=-1) vm_status.vmAgent.extensionHandlers.append(handler_status)
def run(self): if not conf.get_provision_enabled(): logger.info("Provisioning is disabled, skipping.") self.write_provisioned() self.report_ready() return try: utc_start = datetime.utcnow() thumbprint = None if self.is_provisioned(): logger.info("Provisioning already completed, skipping.") return logger.info("Running default provisioning handler") if not self.validate_cloud_init(is_expected=False): raise ProvisionError("cloud-init appears to be running, " "this is not expected, cannot continue") logger.info("Copying ovf-env.xml") ovf_env = self.protocol_util.copy_ovf_env() self.protocol_util.get_protocol(by_file=True) self.report_not_ready("Provisioning", "Starting") logger.info("Starting provisioning") self.provision(ovf_env) thumbprint = self.reg_ssh_host_key() self.osutil.restart_ssh_service() self.write_provisioned() self.report_event("Provisioning succeeded ({0}s)".format(self._get_uptime_seconds()), is_success=True, duration=elapsed_milliseconds(utc_start)) self.report_event(message=ovf_env.provision_guest_agent, is_success=True, duration=0, operation=WALAEventOperation.ProvisionGuestAgent) self.report_ready(thumbprint) logger.info("Provisioning complete") except (ProtocolError, ProvisionError) as e: self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) logger.error("Provisioning failed: {0}", ustr(e)) return
def copy_ovf_env(self): """ Copy ovf env file from dvd to hard disk. Remove password before save it to the disk """ dvd_mount_point = conf.get_dvd_mount_point() ovf_file_path_on_dvd = os.path.join(dvd_mount_point, OVF_FILE_NAME) tag_file_path_on_dvd = os.path.join(dvd_mount_point, TAG_FILE_NAME) ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME) tag_file_path = os.path.join(conf.get_lib_dir(), TAG_FILE_NAME) try: self.osutil.mount_dvd() except OSUtilError as e: raise ProtocolError("[CopyOvfEnv] Error mounting dvd: " "{0}".format(ustr(e))) try: ovfxml = fileutil.read_file(ovf_file_path_on_dvd, remove_bom=True) ovfenv = OvfEnv(ovfxml) except IOError as e: raise ProtocolError("[CopyOvfEnv] Error reading file " "{0}: {1}".format(ovf_file_path_on_dvd, ustr(e))) try: ovfxml = re.sub(PASSWORD_PATTERN, PASSWORD_REPLACEMENT, ovfxml) fileutil.write_file(ovf_file_path, ovfxml) except IOError as e: raise ProtocolError("[CopyOvfEnv] Error writing file " "{0}: {1}".format(ovf_file_path, ustr(e))) try: if os.path.isfile(tag_file_path_on_dvd): logger.info("Found {0} in provisioning ISO", TAG_FILE_NAME) shutil.copyfile(tag_file_path_on_dvd, tag_file_path) except IOError as e: raise ProtocolError("[CopyOvfEnv] Error copying file " "{0} to {1}: {2}".format(tag_file_path, tag_file_path, ustr(e))) try: self.osutil.umount_dvd() self.osutil.eject_dvd() except OSUtilError as e: logger.warn(ustr(e)) return ovfenv
def _write_pid_file(self): previous_pid_file, pid_file = self._get_pid_files() try: fileutil.write_file(pid_file, ustr(os.getpid())) logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid())) except Exception as e: pid_file = None logger.warn( u"Expection writing goal state agent {0} pid to {1}: {2}", CURRENT_AGENT, pid_file, ustr(e)) return previous_pid_file, pid_file
def run(self): self.ext_handlers, etag = None, None try: self.protocol = self.protocol_util.get_protocol() self.ext_handlers, etag = self.protocol.get_ext_handlers() self.get_artifact_error_state.reset() except Exception as e: msg = u"Exception retrieving extension handlers: {0}".format(ustr(e)) detailed_msg = '{0} {1}'.format(msg, traceback.format_exc()) self.get_artifact_error_state.incr() if self.get_artifact_error_state.is_triggered(): add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.GetArtifactExtended, is_success=False, message="Failed to get extension artifact for over " "{0): {1}".format(self.get_artifact_error_state.min_timedelta, msg)) self.get_artifact_error_state.reset() else: logger.warn(msg) add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionProcessing, is_success=False, message=detailed_msg) return try: msg = u"Handle extensions updates for incarnation {0}".format(etag) logger.verbose(msg) # Log status report success on new config self.log_report = True self.handle_ext_handlers(etag) self.last_etag = etag self.report_ext_handlers_status() self.cleanup_outdated_handlers() except Exception as e: msg = u"Exception processing extension handlers: {0}".format( ustr(e)) logger.warn(msg) add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ExtensionProcessing, is_success=False, message=msg) return
def _get_data(self, url, headers=None): try: resp = restutil.http_get(url, headers=headers) except HttpError as e: raise ProtocolError(ustr(e)) if resp.status != httpclient.OK: raise ProtocolError("{0} - GET: {1}".format(resp.status, url)) data = resp.read() etag = resp.getheader('ETag') if data is None: return None data = json.loads(ustr(data, encoding="utf-8")) return data, etag
def run(self): # If provision is not enabled, report ready and then return if not conf.get_provision_enabled(): logger.info("Provisioning is disabled, skipping.") return try: utc_start = datetime.utcnow() thumbprint = None # if provisioning is already done, return if self.is_provisioned(): logger.info("Provisioning already completed, skipping.") return logger.info("Running default provisioning handler") if not self.validate_cloud_init(is_expected=False): raise ProvisionError("cloud-init appears to be running, " "this is not expected, cannot continue") logger.info("Copying ovf-env.xml") ovf_env = self.protocol_util.copy_ovf_env() self.protocol_util.get_protocol_by_file() self.report_not_ready("Provisioning", "Starting") logger.info("Starting provisioning") self.provision(ovf_env) thumbprint = self.reg_ssh_host_key() self.osutil.restart_ssh_service() # write out provisioned file and report Ready self.write_provisioned() self.report_event("Provision succeed", is_success=True, duration=elapsed_milliseconds(utc_start)) self.report_ready(thumbprint) logger.info("Provisioning complete") except (ProtocolError, ProvisionError) as e: self.report_not_ready("ProvisioningFailed", ustr(e)) self.report_event(ustr(e)) logger.error("Provisioning failed: {0}", ustr(e)) return
def run(self, child_args=None): logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) self.check_pid() self.initialize_environment() CGroups.setup() # If FIPS is enabled, set the OpenSSL environment variable # Note: # -- Subprocesses inherit the current environment if conf.get_fips_enabled(): os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1' while self.running: try: self.daemon(child_args) except Exception as e: err_msg = traceback.format_exc() add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg), op=WALAEventOperation.UnhandledError) logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon") time.sleep(15)
def test_get_dvd_device_failure(self): with patch.object(os, 'listdir', return_value=['cpu', 'notmatching']): try: osutil.DefaultOSUtil().get_dvd_device() self.fail('OSUtilError was not raised') except OSUtilError as ose: self.assertTrue('notmatching' in ustr(ose))
def report_ext_handlers_status(self): """Go thru handler_state dir, collect and report status""" vm_status = VMStatus() vm_status.vmAgent.version = str(CURRENT_VERSION) vm_status.vmAgent.status = "Ready" vm_status.vmAgent.message = "Guest Agent is running" if self.ext_handlers is not None: for ext_handler in self.ext_handlers.extHandlers: try: self.report_ext_handler_status(vm_status, ext_handler) except ExtensionError as e: add_event( AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=ustr(e)) logger.verbose("Report vm agent status") try: self.protocol.report_vm_status(vm_status) if self.log_report: logger.verbose("Successfully reported vm agent status") except ProtocolError as e: message = "Failed to report vm agent status: {0}".format(e) add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=message)
def asn1_to_ssh(self, pubkey): lines = pubkey.split("\n") lines = [x for x in lines if not x.startswith("----")] base64_encoded = "".join(lines) try: # TODO remove pyasn1 dependency from pyasn1.codec.der import decoder as der_decoder der_encoded = base64.b64decode(base64_encoded) der_encoded = der_decoder.decode(der_encoded)[0][1] key = der_decoder.decode(self.bits_to_bytes(der_encoded))[0] n = key[0] e = key[1] keydata = bytearray() keydata.extend(struct.pack(">I", len("ssh-rsa"))) keydata.extend(b"ssh-rsa") keydata.extend(struct.pack(">I", len(self.num_to_bytes(e)))) keydata.extend(self.num_to_bytes(e)) keydata.extend(struct.pack(">I", len(self.num_to_bytes(n)) + 1)) keydata.extend(b"\0") keydata.extend(self.num_to_bytes(n)) keydata_base64 = base64.b64encode(bytebuffer(keydata)) return ustr(b"ssh-rsa " + keydata_base64 + b"\n", encoding="utf-8") except ImportError as e: raise CryptError("Failed to load pyasn1.codec.der")
def test_remove_bom(self): # Test bom could be removed data = ustr(b"\xef\xbb\xbfhehe", encoding="utf-8") data = textutil.remove_bom(data) self.assertNotEquals(0xBB, data[0]) # bom is comprised of a sequence of three bytes and ff length of the input is shorter # than three bytes, remove_bom should not do anything data = u"\xa7" data = textutil.remove_bom(data) self.assertEquals(data, data[0]) data = u"\xa7\xef" data = textutil.remove_bom(data) self.assertEquals(u"\xa7", data[0]) self.assertEquals(u"\xef", data[1]) # Test string without BOM is not affected data = u"hehe" data = textutil.remove_bom(data) self.assertEquals(u"h", data[0]) data = u"" data = textutil.remove_bom(data) self.assertEquals(u"", data) data = u" " data = textutil.remove_bom(data) self.assertEquals(u" ", data)
def _ensure_no_orphans(self, orphan_wait_interval=ORPHAN_WAIT_INTERVAL): previous_pid_file, pid_file = self._write_pid_file() if previous_pid_file is not None: try: pid = fileutil.read_file(previous_pid_file) wait_interval = orphan_wait_interval while self.osutil.check_pid_alive(pid): wait_interval -= GOAL_STATE_INTERVAL if wait_interval <= 0: logger.warn( u"{0} forcibly terminated orphan process {1}", CURRENT_AGENT, pid) os.kill(pid, signal.SIGKILL) break logger.info( u"{0} waiting for orphan process {1} to terminate", CURRENT_AGENT, pid) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn( u"Exception occurred waiting for orphan agent to terminate: {0}", ustr(e)) return
def _unpack(self): try: if os.path.isdir(self.get_agent_dir()): shutil.rmtree(self.get_agent_dir()) zipfile.ZipFile(self.get_agent_pkg_path()).extractall(self.get_agent_dir()) except Exception as e: msg = u"Exception unpacking Agent {0} from {1}: {2}".format( self.name, self.get_agent_pkg_path(), ustr(e)) raise UpdateError(msg) if not os.path.isdir(self.get_agent_dir()): msg = u"Unpacking Agent {0} failed to create directory {1}".format( self.name, self.get_agent_dir()) raise UpdateError(msg) logger.verbose( u"Agent {0} unpacked successfully to {1}", self.name, self.get_agent_dir()) return
def wait_for_ovfenv(self, max_retry=1800, sleep_time=1): """ Wait for cloud-init to copy ovf-env.xml file from provision ISO """ ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME) for retry in range(0, max_retry): if os.path.isfile(ovf_file_path): try: ovf_env = OvfEnv(fileutil.read_file(ovf_file_path)) self.handle_provision_guest_agent(ovf_env.provision_guest_agent) return except ProtocolError as pe: raise ProvisionError("OVF xml could not be parsed " "[{0}]: {1}".format(ovf_file_path, ustr(pe))) else: if retry < max_retry - 1: logger.info( "Waiting for cloud-init to copy ovf-env.xml to {0} " "[{1} retries remaining, " "sleeping {2}s]".format(ovf_file_path, max_retry - retry, sleep_time)) if not self.validate_cloud_init(): logger.warn("cloud-init does not appear to be running") time.sleep(sleep_time) raise ProvisionError("Giving up, ovf-env.xml was not copied to {0} " "after {1}s".format(ovf_file_path, max_retry * sleep_time))
def _purge_agents(self): """ Remove from disk all directories and .zip files of unknown agents (without removing the current, running agent). """ path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME)) known_versions = [agent.version for agent in self.agents] if not is_current_agent_installed() and CURRENT_VERSION not in known_versions: logger.warn( u"Running Agent {0} was not found in the agent manifest - adding to list", CURRENT_VERSION) known_versions.append(CURRENT_VERSION) for agent_path in glob.iglob(path): try: name = fileutil.trim_ext(agent_path, "zip") m = AGENT_DIR_PATTERN.match(name) if m is not None and FlexibleVersion(m.group(1)) not in known_versions: if os.path.isfile(agent_path): logger.info(u"Purging outdated Agent file {0}", agent_path) os.remove(agent_path) else: logger.info(u"Purging outdated Agent directory {0}", agent_path) shutil.rmtree(agent_path) except Exception as e: logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e)) return
def get_distro(): if "FreeBSD" in platform.system(): release = re.sub("\-.*\Z", "", ustr(platform.release())) osinfo = ["freebsd", release, "", "freebsd"] elif "linux_distribution" in dir(platform): osinfo = list( platform.linux_distribution( full_distribution_name=0, supported_dists=platform._supported_dists + ("alpine",) ) ) full_name = platform.linux_distribution()[0].strip() osinfo.append(full_name) else: osinfo = platform.dist() # The platform.py lib has issue with detecting oracle linux distribution. # Merge the following patch provided by oracle as a temporary fix. if os.path.exists("/etc/oracle-release"): osinfo[2] = "oracle" osinfo[3] = "Oracle Linux" # The platform.py lib has issue with detecting BIG-IP linux distribution. # Merge the following patch provided by F5. if os.path.exists("/shared/vadc"): osinfo = get_f5_platform() # Remove trailing whitespace and quote in distro name osinfo[0] = osinfo[0].strip('"').strip(" ").lower() return osinfo
def save_event(self, data): if self.event_dir is None: logger.warn("Event reporter is not initialized.") return if not os.path.exists(self.event_dir): os.mkdir(self.event_dir) os.chmod(self.event_dir, 0o700) existing_events = os.listdir(self.event_dir) if len(existing_events) >= 1000: existing_events.sort() oldest_files = existing_events[:-999] logger.warn("Too many files under: {0}, removing oldest".format(self.event_dir)) try: for f in oldest_files: os.remove(os.path.join(self.event_dir, f)) except IOError as e: raise EventError(e) filename = os.path.join(self.event_dir, ustr(int(time.time() * 1000000))) try: with open(filename + ".tmp", 'wb+') as hfile: hfile.write(data.encode("utf-8")) os.rename(filename + ".tmp", filename + ".tld") except IOError as e: raise EventError("Failed to write events to file:{0}", e)
def send_imds_heartbeat(self): """ Send a health signal every IMDS_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have successfully called and validated a response in the last IMDS_HEALTH_PERIOD. """ if self.last_imds_heartbeat is None: self.last_imds_heartbeat = datetime.datetime.utcnow() - MonitorHandler.IMDS_HEARTBEAT_PERIOD if datetime.datetime.utcnow() >= (self.last_imds_heartbeat + MonitorHandler.IMDS_HEARTBEAT_PERIOD): try: is_currently_healthy, response = self.imds_client.validate() if is_currently_healthy: self.imds_errorstate.reset() else: self.imds_errorstate.incr() is_healthy = self.imds_errorstate.is_triggered() is False logger.verbose("IMDS health: {0} [{1}]", is_healthy, response) self.health_service.report_imds_status(is_healthy, response) except Exception as e: msg = "Exception sending imds heartbeat: {0}".format(ustr(e)) add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ImdsHeartbeat, is_success=False, message=msg, log_event=False) self.last_imds_heartbeat = datetime.datetime.utcnow()
def _emit_changes_in_default_configuration(): try: def log_event(msg): logger.info(msg) add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=msg) def log_if_int_changed_from_default(name, current): default = conf.get_int_default_value(name) if default != current: log_event( "{0} changed from its default: {1}. New value: {2}". format(name, default, current)) def log_if_op_disabled(name, value): if not value: log_event( "{0} is set to False, not processing the operation". format(name)) log_if_int_changed_from_default("Extensions.GoalStatePeriod", conf.get_goal_state_period()) log_if_op_disabled("OS.EnableFirewall", conf.enable_firewall()) log_if_op_disabled("Extensions.Enabled", conf.get_extensions_enabled()) if conf.enable_firewall(): log_if_int_changed_from_default( "OS.EnableFirewallPeriod", conf.get_enable_firewall_period()) if conf.get_lib_dir() != "/var/lib/waagent": log_event("lib dir is in an unexpected location: {0}".format( conf.get_lib_dir())) except Exception as e: logger.warn("Failed to log changes in configuration: {0}", ustr(e))
def set_handler_status(self, status="NotReady", message="", code=0): state_dir = self.get_conf_dir() handler_status = ExtHandlerStatus() handler_status.name = self.ext_handler.name handler_status.version = str(self.ext_handler.properties.version) handler_status.message = message handler_status.code = code handler_status.status = status status_file = os.path.join(state_dir, "HandlerStatus") try: handler_status_json = json.dumps(get_properties(handler_status)) if handler_status_json is not None: fileutil.write_file(status_file, handler_status_json) else: self.logger.error("Failed to create JSON document of handler status for {0} version {1}".format( self.ext_handler.name, self.ext_handler.properties.version)) except (IOError, ValueError, ProtocolError) as e: fileutil.clean_ioerror(e, paths=[status_file]) self.logger.error("Failed to save handler status: {0}, {1}", ustr(e), traceback.format_exc())
def poll_all_tracked(): metrics = [] with CGroupsTelemetry._rlock: for cgroup in CGroupsTelemetry._tracked[:]: try: metrics.extend(cgroup.get_tracked_metrics()) except Exception as e: # pylint: disable=C0103 # There can be scenarios when the CGroup has been deleted by the time we are fetching the values # from it. This would raise IOError with file entry not found (ERRNO: 2). We do not want to log # every occurrences of such case as it would be very verbose. We do want to log all the other # exceptions which could occur, which is why we do a periodic log for all the other errors. if not isinstance( e, (IOError, OSError)) or e.errno != errno.ENOENT: # pylint: disable=E1101 logger.periodic_warn( logger.EVERY_HOUR, '[PERIODIC] Could not collect metrics for cgroup ' '{0}. Error : {1}'.format(cgroup.name, ustr(e))) if not cgroup.is_active(): CGroupsTelemetry.stop_tracking(cgroup) return metrics
def test_read_response_bytes(self): response_bytes = '7b:0a:20:20:20:20:22:65:72:72:6f:72:43:6f:64:65:22:' \ '3a:20:22:54:68:65:20:62:6c:6f:62:20:74:79:70:65:20:' \ '69:73:20:69:6e:76:61:6c:69:64:20:66:6f:72:20:74:68:' \ '69:73:20:6f:70:65:72:61:74:69:6f:6e:2e:22:2c:0a:20:' \ '20:20:20:22:6d:65:73:73:61:67:65:22:3a:20:22:c3:af:' \ 'c2:bb:c2:bf:3c:3f:78:6d:6c:20:76:65:72:73:69:6f:6e:' \ '3d:22:31:2e:30:22:20:65:6e:63:6f:64:69:6e:67:3d:22:' \ '75:74:66:2d:38:22:3f:3e:3c:45:72:72:6f:72:3e:3c:43:' \ '6f:64:65:3e:49:6e:76:61:6c:69:64:42:6c:6f:62:54:79:' \ '70:65:3c:2f:43:6f:64:65:3e:3c:4d:65:73:73:61:67:65:' \ '3e:54:68:65:20:62:6c:6f:62:20:74:79:70:65:20:69:73:' \ '20:69:6e:76:61:6c:69:64:20:66:6f:72:20:74:68:69:73:' \ '20:6f:70:65:72:61:74:69:6f:6e:2e:0a:52:65:71:75:65:' \ '73:74:49:64:3a:63:37:34:32:39:30:63:62:2d:30:30:30:' \ '31:2d:30:30:62:35:2d:30:36:64:61:2d:64:64:36:36:36:' \ '61:30:30:30:22:2c:0a:20:20:20:20:22:64:65:74:61:69:' \ '6c:73:22:3a:20:22:22:0a:7d'.split(':') expected_response = '[HTTP Failed] [status: reason] {\n "errorCode": "The blob ' \ 'type is invalid for this operation.",\n ' \ '"message": "<?xml version="1.0" ' \ 'encoding="utf-8"?>' \ '<Error><Code>InvalidBlobType</Code><Message>The ' \ 'blob type is invalid for this operation.\n' \ 'RequestId:c74290cb-0001-00b5-06da-dd666a000",' \ '\n "details": ""\n}' response_string = ''.join(chr(int(b, 16)) for b in response_bytes) response = MagicMock() response.status = 'status' response.reason = 'reason' with patch.object(response, 'read') as patch_response: patch_response.return_value = response_string result = restutil.read_response_error(response) self.assertEqual(result, expected_response) try: raise HttpError("{0}".format(result)) except HttpError as e: self.assertTrue(result in ustr(e))
def read_response_error(resp): result = '' if resp is not None: try: result = "[HTTP Failed] [{0}: {1}] {2}".format( resp.status, resp.reason, resp.read()) # this result string is passed upstream to several methods # which do a raise HttpError() or a format() of some kind; # as a result it cannot have any unicode characters if PY_VERSION_MAJOR < 3: result = ustr(result, encoding='ascii', errors='ignore') else: result = result\ .encode(encoding='ascii', errors='ignore')\ .decode(encoding='ascii', errors='ignore') result = textutil.replace_non_ascii(result) except Exception: logger.warn(traceback.format_exc()) return result
def report_ext_handlers_status(self): """Go through handler_state dir, collect and report status""" vm_status = VMStatus(status="Ready", message="Guest Agent is running") if self.ext_handlers is not None: for ext_handler in self.ext_handlers.extHandlers: try: self.report_ext_handler_status(vm_status, ext_handler) except ExtensionError as e: add_event( AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=ustr(e)) logger.verbose("Report vm agent status") try: self.protocol.report_vm_status(vm_status) if self.log_report: logger.verbose("Successfully reported vm agent status") except ProtocolError as e: message = "Failed to report vm agent status: {0}".format(e) add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=message)
def send_host_plugin_heartbeat(self): """ Send a health signal every HOST_PLUGIN_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have been able to communicate with HostGAPlugin at least once in the last HOST_PLUGIN_HEALTH_PERIOD. """ if self.last_host_plugin_heartbeat is None: self.last_host_plugin_heartbeat = datetime.datetime.utcnow( ) - MonitorHandler.HOST_PLUGIN_HEARTBEAT_PERIOD if datetime.datetime.utcnow() >= ( self.last_host_plugin_heartbeat + MonitorHandler.HOST_PLUGIN_HEARTBEAT_PERIOD): try: host_plugin = self.protocol.client.get_host_plugin() host_plugin.ensure_initialized() is_currently_healthy = host_plugin.get_health() if is_currently_healthy: self.host_plugin_errorstate.reset() else: self.host_plugin_errorstate.incr() is_healthy = self.host_plugin_errorstate.is_triggered( ) is False logger.verbose("HostGAPlugin health: {0}", is_healthy) self.health_service.report_host_plugin_heartbeat(is_healthy) except Exception as e: msg = "Exception sending host plugin heartbeat: {0}".format( ustr(e)) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HostPluginHeartbeat, is_success=False, message=msg, log_event=False) self.last_host_plugin_heartbeat = datetime.datetime.utcnow()
def test_save_event_cleanup(self): for i in range(0, 2000): evt = os.path.join(self.tmp_dir, '{0}.tld'.format(ustr(1491004920536531 + i))) with open(evt, 'w') as fh: fh.write('test event {0}'.format(i)) events = os.listdir(self.tmp_dir) self.assertTrue(len(events) == 2000, "{0} events found, 2000 expected".format(len(events))) add_event('test', message='last event') events = os.listdir(self.tmp_dir) events.sort() self.assertTrue(len(events) == 1000, "{0} events found, 1000 expected".format(len(events))) first_event = os.path.join(self.tmp_dir, events[0]) with open(first_event) as first_fh: first_event_text = first_fh.read() self.assertTrue('test event 1001' in first_event_text) last_event = os.path.join(self.tmp_dir, events[-1]) with open(last_event) as last_fh: last_event_text = last_fh.read() self.assertTrue('last event' in last_event_text)
def set_limits(self): """ Set per-hierarchy limits based on the cgroup name (agent or particular extension) """ if not conf.get_cgroups_enforce_limits(): return if self.name is None: return for ext in conf.get_cgroups_excluded(): if ext in self.name.lower(): logger.info('No cgroups limits for {0}'.format(self.name)) return cpu_limit = self.threshold.cpu_limit mem_limit = self.threshold.memory_limit msg = '{0}: {1}% {2}mb'.format(self.name, cpu_limit, mem_limit) logger.info("Setting cgroups limits for {0}".format(msg)) success = False try: self.set_cpu_limit(cpu_limit) self.set_memory_limit(mem_limit) success = True except Exception as ge: msg = '[{0}] {1}'.format(msg, ustr(ge)) raise finally: from azurelinuxagent.common.event import add_event, WALAEventOperation add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.SetCGroupsLimits, is_success=success, message=msg, log_event=False)
def send_host_plugin_heartbeat(self): """ Send a health signal every HOST_PLUGIN_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have been able to communicate with HostGAPlugin at least once in the last HOST_PLUGIN_HEALTH_PERIOD. """ try: host_plugin = self.protocol.client.get_host_plugin() host_plugin.ensure_initialized() is_currently_healthy = host_plugin.get_health() if is_currently_healthy: self.host_plugin_errorstate.reset() else: self.host_plugin_errorstate.incr() is_healthy = self.host_plugin_errorstate.is_triggered() is False logger.verbose("HostGAPlugin health: {0}", is_healthy) self.health_service.report_host_plugin_heartbeat(is_healthy) if not is_healthy: add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HostPluginHeartbeatExtended, is_success=False, message='{0} since successful heartbeat'.format( self.host_plugin_errorstate.fail_time), log_event=False) except Exception as e: # pylint: disable=C0103 msg = "Exception sending host plugin heartbeat: {0}".format( ustr(e)) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HostPluginHeartbeat, is_success=False, message=msg, log_event=False)
def start_extension_command(self, extension_name, command, shell, cwd, env, stdout, stderr): """ Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup :param extension_name: The extension executing the command :param command: The command to invoke :param cwd: The working directory for the command :param env: The environment to pass to the command's process :param stdout: File object to redirect stdout to :param stderr: File object to redirect stderr to """ if not self.enabled(): process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) else: process, extension_cgroups = self._cgroups_api.start_extension_command( extension_name, command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr) try: for cgroup in extension_cgroups: CGroupsTelemetry.track_cgroup(cgroup) except Exception as e: logger.warn( "Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. Error: {1}" .format(cgroup.path, ustr(e))) return process
def __parse_and_add_extension_settings(settings_node, name, ext_handler, depends_on_level, state=ExtensionState.Enabled): seq_no = getattrib(settings_node, "seqNo") if seq_no in (None, ""): raise ExtensionsConfigError( "SeqNo not specified for the Extension: {0}".format(name)) try: runtime_settings = json.loads(gettext(settings_node)) except ValueError as error: logger.error("Invalid extension settings: {0}", ustr(error)) # Incase of invalid/no settings, add the name and seqNo of the Extension and treat it as an extension with # no settings since we were able to successfully parse those data properly. Without this, we wont report # anything for that sequence number and CRP would eventually have to timeout rather than fail fast. ext_handler.properties.extensions.append( Extension(name=name, sequenceNumber=seq_no, state=state, dependencyLevel=depends_on_level)) return for plugin_settings_list in runtime_settings["runtimeSettings"]: handler_settings = plugin_settings_list["handlerSettings"] ext = Extension() # There is no "extension name" for single Handler Settings. Use HandlerName for those ext.name = name ext.state = state ext.sequenceNumber = seq_no ext.publicSettings = handler_settings.get("publicSettings") ext.protectedSettings = handler_settings.get("protectedSettings") ext.dependencyLevel = depends_on_level thumbprint = handler_settings.get( "protectedSettingsCertThumbprint") ext.certificateThumbprint = thumbprint ext_handler.properties.extensions.append(ext)
def run(self, child_args=None): logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION) logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION) logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) self.check_pid() # If FIPS is enabled, set the OpenSSL environment variable # Note: # -- Subprocesses inherit the current environment if conf.get_fips_enabled(): os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1' while self.running: try: self.daemon(child_args) except Exception as e: err_msg = traceback.format_exc() add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg), op=WALAEventOperation.UnhandledError) logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon") time.sleep(15)
def create_agent_cgroups(self): try: cgroup_unit = None cgroup_paths = fileutil.read_file("/proc/self/cgroup") for entry in cgroup_paths.splitlines(): fields = entry.split(':') if fields[1] == "name=systemd": cgroup_unit = fields[2].lstrip(os.path.sep) cpu_cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, 'cpu', cgroup_unit) memory_cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, 'memory', cgroup_unit) return [ CGroup.create(cpu_cgroup_path, 'cpu', VM_AGENT_CGROUP_NAME), CGroup.create(memory_cgroup_path, 'memory', VM_AGENT_CGROUP_NAME) ] except Exception as e: raise CGroupsException( "Failed to get paths of agent's cgroups. Error: {0}".format( ustr(e)))
def poll_telemetry_metrics(self): """ This method polls the tracked cgroups to get data from the cgroups filesystem and send the data directly. :return: List of Metrics (which would be sent to PerfCounterMetrics directly. """ try: # If there is an issue in reporting, it should not take down whole monitor thread. time_now = datetime.datetime.utcnow() if not self.last_cgroup_polling_telemetry: self.last_cgroup_polling_telemetry = time_now if time_now >= (self.last_cgroup_polling_telemetry + MonitorHandler.CGROUP_TELEMETRY_POLLING_PERIOD): metrics = CGroupsTelemetry.poll_all_tracked() self.last_cgroup_polling_telemetry = time_now if metrics: for metric in metrics: report_metric(metric.category, metric.counter, metric.instance, metric.value) except Exception as e: logger.warn("Could not poll all the tracked telemetry due to {0}", ustr(e))
def run(self): self.ext_handlers, etag = None, None try: self.protocol = self.protocol_util.get_protocol() self.ext_handlers, etag = self.protocol.get_ext_handlers() except ProtocolError as e: msg = u"Exception retrieving extension handlers: {0}".format( ustr(e)) logger.warn(msg) add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=msg) return msg = u"Handle extensions updates for incarnation {0}".format(etag) logger.verbose(msg) # Log status report success on new config self.log_report = True self.handle_ext_handlers(etag) self.last_etag = etag self.report_ext_handlers_status()
def get_api_versions(self): url = URI_FORMAT_GET_API_VERSIONS.format(self.endpoint, HOST_PLUGIN_PORT) logger.verbose("HostGAPlugin: Getting API versions at [{0}]" .format(url)) return_val = [] error_response = '' is_healthy = False try: headers = {HEADER_CONTAINER_ID: self.container_id} response = restutil.http_get(url, headers) if restutil.request_failed(response): error_response = restutil.read_response_error(response) logger.error("HostGAPlugin: Failed Get API versions: {0}".format(error_response)) else: return_val = ustr(remove_bom(response.read()), encoding='utf-8') is_healthy = True except HttpError as e: logger.error("HostGAPlugin: Exception Get API versions: {0}".format(e)) self.health_service.report_host_plugin_versions(is_healthy=is_healthy, response=error_response) return return_val
def get_distro(): if 'FreeBSD' in platform.system(): release = re.sub('\-.*\Z', '', ustr(platform.release())) osinfo = ['freebsd', release, '', 'freebsd'] elif 'linux_distribution' in dir(platform): osinfo = list( platform.linux_distribution( full_distribution_name=0, supported_dists=platform._supported_dists + ('alpine', ))) full_name = platform.linux_distribution()[0].strip() osinfo.append(full_name) else: osinfo = platform.dist() # The platform.py lib has issue with detecting oracle linux distribution. # Merge the following patch provided by oracle as a temparory fix. if os.path.exists("/etc/oracle-release"): osinfo[2] = "oracle" osinfo[3] = "Oracle Linux" # Remove trailing whitespace and quote in distro name osinfo[0] = osinfo[0].strip('"').strip(' ').lower() return osinfo
def _operation(self): try: is_currently_healthy, response = self.imds_client.validate() if is_currently_healthy: self.imds_error_state.reset() else: self.imds_error_state.incr() is_healthy = self.imds_error_state.is_triggered() is False logger.verbose("IMDS health: {0} [{1}]", is_healthy, response) self.health_service.report_imds_status(is_healthy, response) except Exception as e: msg = "Exception sending imds heartbeat: {0}".format(ustr(e)) add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ImdsHeartbeat, is_success=False, message=msg, log_event=False)
def _operation(self): try: host_plugin = self.protocol.client.get_host_plugin() host_plugin.ensure_initialized() self.protocol.update_host_plugin_from_goal_state() is_currently_healthy = host_plugin.get_health() if is_currently_healthy: self.host_plugin_error_state.reset() else: self.host_plugin_error_state.incr() is_healthy = self.host_plugin_error_state.is_triggered() is False logger.verbose("HostGAPlugin health: {0}", is_healthy) self.health_service.report_host_plugin_heartbeat(is_healthy) if not is_healthy: add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HostPluginHeartbeatExtended, is_success=False, message='{0} since successful heartbeat'.format(self.host_plugin_error_state.fail_time), log_event=False) except Exception as e: msg = "Exception sending host plugin heartbeat: {0}".format(ustr(e)) add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HostPluginHeartbeat, is_success=False, message=msg, log_event=False) raise
def _emit_changes_in_default_configuration(): try: def log_if_int_changed_from_default(name, current): default = conf.get_int_default_value(name) if default != current: msg = "{0} changed from its default; new value: {1}".format( name, current) logger.info(msg) add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=msg) log_if_int_changed_from_default("Extensions.GoalStatePeriod", conf.get_goal_state_period()) if not conf.enable_firewall(): message = "OS.EnableFirewall is False" logger.info(message) add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=message) else: log_if_int_changed_from_default( "OS.EnableFirewallPeriod", conf.get_enable_firewall_period()) if conf.get_lib_dir() != "/var/lib/waagent": message = "lib dir is in an unexpected location: {0}".format( conf.get_lib_dir()) logger.info(message) add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=message) except Exception as e: logger.warn("Failed to log changes in configuration: {0}", ustr(e))
def _detect_wire_protocol(self): endpoint = self.dhcp_handler.endpoint if endpoint is None: ''' Check if DHCP can be used to get the wire protocol endpoint ''' (dhcp_available, conf_endpoint) = self.osutil.is_dhcp_available() if dhcp_available: logger.info( "WireServer endpoint is not found. Rerun dhcp handler") try: self.dhcp_handler.run() except DhcpError as e: raise ProtocolError(ustr(e)) endpoint = self.dhcp_handler.endpoint else: logger.info("_detect_wire_protocol: DHCP not available") endpoint = self._get_wireserver_endpoint() if endpoint == None: endpoint = conf_endpoint logger.info("Using hardcoded WireServer endpoint {0}", endpoint) else: logger.info("WireServer endpoint {0} read from file", endpoint) try: protocol = WireProtocol(endpoint) protocol.detect() self._set_wireserver_endpoint(endpoint) self.save_protocol("WireProtocol") return protocol except ProtocolError as e: logger.info("WireServer is not responding. Reset endpoint") self.dhcp_handler.endpoint = None self.dhcp_handler.skip_cache = True raise e
def _foreach_legacy_cgroup(operation): """ Previous versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent; starting from version 2.2.41 we track the agent service in walinuxagent.service instead of WALinuxAgent/WALinuxAgent. Also, when running under systemd, the PIDs should not be explicitly moved to the cgroup filesystem. The older daemons would incorrectly do that under certain conditions. This method checks for the existence of the legacy cgroups and, if the daemon's PID has been added to them, executes the given operation on the cgroups. After this check, the method attempts to remove the legacy cgroups. :param operation: The function to execute on each legacy cgroup. It must take 2 arguments: the controller and the daemon's PID """ legacy_cgroups = [] for controller in ['cpu', 'memory']: cgroup = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller, "WALinuxAgent", "WALinuxAgent") if os.path.exists(cgroup): logger.info('Found legacy cgroup {0}', cgroup) legacy_cgroups.append((controller, cgroup)) try: for controller, cgroup in legacy_cgroups: procs_file = os.path.join(cgroup, "cgroup.procs") if os.path.exists(procs_file): procs_file_contents = fileutil.read_file( procs_file).strip() daemon_pid = CGroupsApi.get_daemon_pid() if ustr(daemon_pid) in procs_file_contents: operation(controller, daemon_pid) finally: for _, cgroup in legacy_cgroups: logger.info('Removing {0}', cgroup) shutil.rmtree(cgroup, ignore_errors=True) return len(legacy_cgroups)
def add_metric(self, category, counter, instance, value, log_event=False): """ Create and save an event which contains a telemetry event. :param str category: The category of metric (e.g. "cpu", "memory") :param str counter: The specific metric within the category (e.g. "%idle") :param str instance: For instanced metrics, the instance identifier (filesystem name, cpu core#, etc.) :param value: Value of the metric :param bool log_event: If true, log the collected metric in the agent log """ if log_event: message = "Metric {0}/{1} [{2}] = {3}".format( category, counter, instance, value) _log_event(AGENT_NAME, "METRIC", message, 0) event = TelemetryEvent(TELEMETRY_METRICS_EVENT_ID, TELEMETRY_EVENT_PROVIDER_ID) event.parameters.append( TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Category, str_to_encoded_ustr(category))) event.parameters.append( TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Counter, str_to_encoded_ustr(counter))) event.parameters.append( TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Instance, str_to_encoded_ustr(instance))) event.parameters.append( TelemetryEventParam(GuestAgentPerfCounterEventsSchema.Value, float(value))) self.add_common_event_parameters(event, datetime.utcnow()) data = get_properties(event) try: self.save_event(json.dumps(data)) except EventError as e: logger.periodic_error(logger.EVERY_FIFTEEN_MINUTES, "[PERIODIC] {0}".format(ustr(e)))
def asn1_to_ssh(self, pubkey): lines = pubkey.split("\n") lines = [x for x in lines if not x.startswith("----")] base64_encoded = "".join(lines) try: #TODO remove pyasn1 dependency from pyasn1.codec.der import decoder as der_decoder der_encoded = base64.b64decode(base64_encoded) der_encoded = der_decoder.decode(der_encoded)[0][1] key = der_decoder.decode(self.bits_to_bytes(der_encoded))[0] n = key[0] e = key[1] keydata = bytearray() keydata.extend(struct.pack('>I', len("ssh-rsa"))) keydata.extend(b"ssh-rsa") keydata.extend(struct.pack('>I', len(self.num_to_bytes(e)))) keydata.extend(self.num_to_bytes(e)) keydata.extend(struct.pack('>I', len(self.num_to_bytes(n)) + 1)) keydata.extend(b"\0") keydata.extend(self.num_to_bytes(n)) keydata_base64 = base64.b64encode(bytebuffer(keydata)) return ustr(b"ssh-rsa " + keydata_base64 + b"\n", encoding='utf-8') except ImportError as e: raise CryptError("Failed to load pyasn1.codec.der")
def __run_command(command_action, command, log_error, encode_output): """ Executes the given command_action and returns its stdout. The command_action is a function that executes a command/pipe and returns its exit code, stdout, and stderr. If there are any errors executing the command it raises a RunCommandException; if 'log_error' is True, it also logs details about the error. If encode_output is True the stdout is returned as a string, otherwise it is returned as a bytes object. """ try: return_code, stdout, stderr = command_action() if encode_output: stdout = __encode_command_output(stdout) stderr = __encode_command_output(stderr) if return_code != 0: if log_error: logger.error( "Command: [{0}], return code: [{1}], stdout: [{2}] stderr: [{3}]", __format_command(command), return_code, stdout, stderr) raise CommandError(command=__format_command(command), return_code=return_code, stdout=stdout, stderr=stderr) return stdout except CommandError: raise except Exception as exception: if log_error: logger.error(u"Command [{0}] raised unexpected exception: [{1}]", __format_command(command), ustr(exception)) raise
def __log_network_setup_service_logs(self): # Get logs from journalctl - https://www.freedesktop.org/software/systemd/man/journalctl.html cmd = ["journalctl", "-u", self._network_setup_service_name, "-b"] service_failed = self.__verify_network_setup_service_failed() try: stdout = shellutil.run_command(cmd) msg = ustr("Logs from the {0} since system boot:\n {1}").format( self._network_setup_service_name, stdout) logger.info(msg) except CommandError as error: msg = "Unable to fetch service logs, Command: {0} failed with ExitCode: {1}\nStdout: {2}\nStderr: {3}".format( ' '.join(cmd), error.returncode, error.stdout, error.stderr) logger.warn(msg) except Exception: msg = "Ran into unexpected error when getting logs for {0} service. Error: {1}".format( self._network_setup_service_name, traceback.format_exc()) logger.warn(msg) # Log service status and logs if we can fetch them from journalctl and send it to Kusto, # else just log the error of the failure of fetching logs add_event(op=WALAEventOperation.PersistFirewallRules, is_success=(not service_failed), message=msg, log_event=False)
def _ensure_firewall_rules_persisted(dst_ip): if not conf.enable_firewall(): logger.info( "Not setting up persistent firewall rules as OS.EnableFirewall=False" ) return is_success = False logger.info("Starting setup for Persistent firewall rules") try: PersistFirewallRulesHandler(dst_ip=dst_ip, uid=os.getuid()).setup() msg = "Persistent firewall rules setup successfully" is_success = True logger.info(msg) except Exception as error: msg = "Unable to setup the persistent firewall rules: {0}".format( ustr(error)) logger.error(msg) add_event(op=WALAEventOperation.PersistFirewallRules, is_success=is_success, message=msg, log_event=False)
def send_imds_heartbeat(self): """ Send a health signal every IMDS_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have successfully called and validated a response in the last IMDS_HEALTH_PERIOD. """ if self.last_imds_heartbeat is None: self.last_imds_heartbeat = datetime.datetime.utcnow( ) - MonitorHandler.IMDS_HEARTBEAT_PERIOD if datetime.datetime.utcnow() >= ( self.last_imds_heartbeat + MonitorHandler.IMDS_HEARTBEAT_PERIOD): try: is_currently_healthy, response = self.imds_client.validate() if is_currently_healthy: self.imds_errorstate.reset() else: self.imds_errorstate.incr() is_healthy = self.imds_errorstate.is_triggered() is False logger.verbose("IMDS health: {0} [{1}]", is_healthy, response) self.health_service.report_imds_status(is_healthy, response) except Exception as e: msg = "Exception sending imds heartbeat: {0}".format(ustr(e)) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ImdsHeartbeat, is_success=False, message=msg, log_event=False) self.last_imds_heartbeat = datetime.datetime.utcnow()
def assert_ensure_initialized(self, patch_event, patch_http_get, patch_report_health, response_body, response_status_code, should_initialize, should_report_healthy): host = hostplugin.HostPluginProtocol(endpoint='ws', container_id='cid', role_config_name='rcf') host.is_initialized = False patch_http_get.return_value = MockResponse( body=response_body, reason='reason', status_code=response_status_code) return_value = host.ensure_initialized() self.assertEqual(return_value, host.is_available) self.assertEqual(should_initialize, host.is_initialized) self.assertEqual(1, patch_event.call_count) self.assertEqual('InitializeHostPlugin', patch_event.call_args[0][0]) self.assertEqual(should_initialize, patch_event.call_args[1]['is_success']) self.assertEqual(1, patch_report_health.call_count) self.assertEqual(should_report_healthy, patch_report_health.call_args[1]['is_healthy']) actual_response = patch_report_health.call_args[1]['response'] if should_initialize: self.assertEqual('', actual_response) else: self.assertTrue('HTTP Failed' in actual_response) self.assertTrue(response_body in actual_response) self.assertTrue(ustr(response_status_code) in actual_response)