def run(self): """ Run the promise This will first load the promise module (which will update process sys.path) """ try: os.chdir(self.partition_folder) promise_started = False if self.uid and self.gid: dropPrivileges(self.uid, self.gid, logger=self.logger) mkdir_p(self.cache_folder) if self.wrap_promise: promise_instance = WrapPromise(self.argument_dict) else: self._createInitFile() promise_module = self._loadPromiseModule() promise_instance = promise_module.RunPromise(self.argument_dict) if not hasattr(promise_instance, 'isAnomalyDetected') or not \ hasattr(promise_instance, 'isTested') or \ (promise_instance.isAnomalyDetected() and self.check_anomaly) or \ (promise_instance.isTested() and not self.check_anomaly): # if the promise will run, we save execution timestamp promise_started = True self.updatePromiseCache( WrapPromise if self.wrap_promise else promise_module.RunPromise, promise_instance, started=promise_started) promise_instance.run(self.check_anomaly, self.allow_bang) except Exception: self.logger.error(traceback.format_exc()) raise
def _checkPromises(self, computer_partition): self.logger.info("Checking promises...") instance_path = os.path.join(self.instance_root, computer_partition.getId()) uid, gid = None, None stat_info = os.stat(instance_path) #stat sys call to get statistics informations uid = stat_info.st_uid gid = stat_info.st_gid promise_present = False # Get the list of promises promise_dir = os.path.join(instance_path, 'etc', 'promise') if os.path.exists(promise_dir) and os.path.isdir(promise_dir): # Check whether every promise is kept for promise in os.listdir(promise_dir): promise_present = True command = [os.path.join(promise_dir, promise)] promise = os.path.basename(command[0]) self.logger.info("Checking promise '%s'.", promise) process_handler = subprocess.Popen(command, preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=instance_path, env=None if sys.platform == 'cygwin' else {}, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) process_handler.stdin.flush() process_handler.stdin.close() process_handler.stdin = None # Check if the promise finished every tenth of second, # but timeout after promise_timeout. sleep_time = 0.1 increment_limit = int(self.promise_timeout / sleep_time) for current_increment in range(0, increment_limit): if process_handler.poll() is None: time.sleep(sleep_time) continue if process_handler.poll() == 0: # Success! break else: stderr = process_handler.communicate()[1] if stderr is None: stderr = "No error output from '%s'." % promise else: stderr = "Promise '%s':" % promise + stderr raise Slapgrid.PromiseError(stderr) else: process_handler.terminate() raise Slapgrid.PromiseError("The promise '%s' timed out" % promise) if not promise_present: self.logger.info("No promise.")
def _checkPromises(self, computer_partition): self.logger.info("Checking promises...") instance_path = os.path.join(self.instance_root, computer_partition.getId()) uid, gid = None, None stat_info = os.stat(instance_path) #stat sys call to get statistics informations uid = stat_info.st_uid gid = stat_info.st_gid promise_present = False # Get the list of promises promise_dir = os.path.join(instance_path, 'etc', 'promise') if os.path.exists(promise_dir) and os.path.isdir(promise_dir): # Check whether every promise is kept for promise in os.listdir(promise_dir): promise_present = True command = [os.path.join(promise_dir, promise)] promise = os.path.basename(command[0]) self.logger.info("Checking promise %r.", promise) process_handler = subprocess.Popen(command, preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=instance_path, env=None if sys.platform == 'cygwin' else {}, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) process_handler.stdin.flush() process_handler.stdin.close() process_handler.stdin = None time.sleep(self.promise_timeout) if process_handler.poll() is None: process_handler.terminate() raise Slapgrid.PromiseError("The promise %r timed out" % promise) elif process_handler.poll() != 0: stderr = process_handler.communicate()[1] if stderr is None: stderr = 'No error output from %r.' % promise else: stderr = 'Promise %r:' % promise + stderr raise Slapgrid.PromiseError(stderr) if not promise_present: self.logger.info("No promise.")
def destroy(self): """Destroys the partition and makes it available for subsequent use." """ self.logger.info("Destroying Computer Partition %s..." % self.computer_partition.getId()) # Launches "destroy" binary if exists destroy_executable_location = os.path.join(self.instance_path, 'sbin', 'destroy') if os.path.exists(destroy_executable_location): uid, gid = self.getUserGroupId() self.logger.debug('Invoking %r' % destroy_executable_location) process_handler = SlapPopen([destroy_executable_location], preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=self.instance_path, env=getCleanEnvironment(logger=self.logger, home_path=pwd.getpwuid(uid).pw_dir), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, logger=self.logger) if process_handler.returncode is None or process_handler.returncode != 0: message = 'Failed to destroy Computer Partition in %r.' % \ self.instance_path self.logger.error(message) raise subprocess.CalledProcessError(message, process_handler.output) # Manually cleans what remains try: for f in [self.key_file, self.cert_file]: if f: if os.path.exists(f): os.unlink(f) # better to manually remove symlinks because rmtree might choke on them sr_symlink = os.path.join(self.instance_path, 'software_release') if os.path.islink(sr_symlink): os.unlink(sr_symlink) for root, dirs, file_list in os.walk(self.instance_path): for directory in dirs: shutil.rmtree(os.path.join(self.instance_path, directory)) for file in file_list: os.remove(os.path.join(self.instance_path, file)) if os.path.exists(self.supervisord_partition_configuration_path): os.remove(self.supervisord_partition_configuration_path) self.updateSupervisor() except IOError as exc: raise IOError("I/O error while freeing partition (%s): %s" % (self.instance_path, exc))
def agregateAndSendUsage(self): """Will agregate usage from each Computer Partition. """ # Prepares environment self.checkEnvironmentAndCreateStructure() self._launchSupervisord() slap_computer_usage = self.slap.registerComputer(self.computer_id) computer_partition_usage_list = [] self.logger.info('Aggregating and sending usage reports...') #We retrieve XSD models try: computer_consumption_model = \ pkg_resources.resource_string( 'slapos.slap', 'doc/computer_consumption.xsd') except IOError: computer_consumption_model = \ pkg_resources.resource_string( __name__, '../../../../slapos/slap/doc/computer_consumption.xsd') try: partition_consumption_model = \ pkg_resources.resource_string( 'slapos.slap', 'doc/partition_consumption.xsd') except IOError: partition_consumption_model = \ pkg_resources.resource_string( __name__, '../../../../slapos/slap/doc/partition_consumption.xsd') clean_run = True # Loop over the different computer partitions computer_partition_list = self.FilterComputerPartitionList( slap_computer_usage.getComputerPartitionList()) for computer_partition in computer_partition_list: try: computer_partition_id = computer_partition.getId() #We want to execute all the script in the report folder instance_path = os.path.join(self.instance_root, computer_partition.getId()) report_path = os.path.join(instance_path, 'etc', 'report') if os.path.isdir(report_path): script_list_to_run = os.listdir(report_path) else: script_list_to_run = [] #We now generate the pseudorandom name for the xml file # and we add it in the invocation_list f = tempfile.NamedTemporaryFile() name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name)) path_to_slapreport = os.path.join(instance_path, 'var', 'xml_report', name_xml) failed_script_list = [] for script in script_list_to_run: invocation_list = [] invocation_list.append(os.path.join(instance_path, 'etc', 'report', script)) #We add the xml_file name to the invocation_list #f = tempfile.NamedTemporaryFile() #name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name)) #path_to_slapreport = os.path.join(instance_path, 'var', name_xml) invocation_list.append(path_to_slapreport) #Dropping privileges uid, gid = None, None stat_info = os.stat(instance_path) #stat sys call to get statistics informations uid = stat_info.st_uid gid = stat_info.st_gid process_handler = SlapPopen(invocation_list, preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=os.path.join(instance_path, 'etc', 'report'), env=None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, logger=self.logger) if process_handler.returncode is None: process_handler.kill() if process_handler.returncode != 0: clean_run = False failed_script_list.append("Script %r failed." % script) self.logger.warning('Failed to run %r' % invocation_list) if len(failed_script_list): computer_partition.error('\n'.join(failed_script_list), logger=self.logger) # Whatever happens, don't stop processing other instances except Exception: self.logger.exception('Cannot run usage script(s) for %r:' % computer_partition.getId()) #Now we loop through the different computer partitions to report report_usage_issue_cp_list = [] for computer_partition in computer_partition_list: try: filename_delete_list = [] computer_partition_id = computer_partition.getId() instance_path = os.path.join(self.instance_root, computer_partition_id) dir_reports = os.path.join(instance_path, 'var', 'xml_report') #The directory xml_report contain a number of files equal #to the number of software instance running inside the same partition if os.path.isdir(dir_reports): filename_list = os.listdir(dir_reports) else: filename_list = [] #self.logger.debug('name List %s' % filename_list) for filename in filename_list: file_path = os.path.join(dir_reports, filename) if os.path.exists(file_path): usage = open(file_path, 'r').read() #We check the validity of xml content of each reports if not self.validateXML(usage, partition_consumption_model): self.logger.info('WARNING: The XML file %s generated by slapreport is ' 'not valid - This report is left as is at %s where you can ' 'inspect what went wrong ' % (filename, dir_reports)) # Warn the SlapOS Master that a partition generates corrupted xml # report else: computer_partition_usage = self.slap.registerComputerPartition( self.computer_id, computer_partition_id) computer_partition_usage.setUsage(usage) computer_partition_usage_list.append(computer_partition_usage) filename_delete_list.append(filename) else: self.logger.debug('Usage report %r not found, ignored' % file_path) #After sending the aggregated file we remove all the valid xml reports for filename in filename_delete_list: os.remove(os.path.join(dir_reports, filename)) # Whatever happens, don't stop processing other instances except Exception: self.logger.exception('Cannot run usage script(s) for %r:' % computer_partition.getId()) for computer_partition_usage in computer_partition_usage_list: self.logger.info('computer_partition_usage_list: %s - %s' % (computer_partition_usage.usage, computer_partition_usage.getId())) #If there is, at least, one report if computer_partition_usage_list != []: try: #We generate the final XML report with asXML method computer_consumption = self.asXML(computer_partition_usage_list) self.logger.info('Final xml report: %s' % computer_consumption) #We test the XML report before sending it if self.validateXML(computer_consumption, computer_consumption_model): self.logger.info('XML file generated by asXML is valid') slap_computer_usage.reportUsage(computer_consumption) else: self.logger.info('XML file generated by asXML is not valid !') raise ValueError('XML file generated by asXML is not valid !') except Exception: issue = "Cannot report usage for %r: %s" % ( computer_partition.getId(), traceback.format_exc()) self.logger.info(issue) computer_partition.error(issue, logger=self.logger) report_usage_issue_cp_list.append(computer_partition_id) for computer_partition in computer_partition_list: if computer_partition.getState() == COMPUTER_PARTITION_DESTROYED_STATE: try: computer_partition_id = computer_partition.getId() try: software_url = computer_partition.getSoftwareRelease().getURI() software_path = os.path.join(self.software_root, md5digest(software_url)) except (NotFoundError, TypeError): software_url = None software_path = None local_partition = Partition( software_path=software_path, instance_path=os.path.join(self.instance_root, computer_partition.getId()), supervisord_partition_configuration_path=os.path.join( self.supervisord_configuration_directory, '%s.conf' % computer_partition_id), supervisord_socket=self.supervisord_socket, computer_partition=computer_partition, computer_id=self.computer_id, partition_id=computer_partition_id, server_url=self.master_url, software_release_url=software_url, certificate_repository_path=self.certificate_repository_path, buildout=self.buildout, logger=self.logger) local_partition.stop() try: computer_partition.stopped() except (SystemExit, KeyboardInterrupt): computer_partition.error(traceback.format_exc(), logger=self.logger) raise except Exception: pass if computer_partition.getId() in report_usage_issue_cp_list: self.logger.info('Ignoring destruction of %r, as no report usage was sent' % computer_partition.getId()) continue local_partition.destroy() except (SystemExit, KeyboardInterrupt): computer_partition.error(traceback.format_exc(), logger=self.logger) raise except Exception: clean_run = False self.logger.exception('') exc = traceback.format_exc() computer_partition.error(exc, logger=self.logger) try: computer_partition.destroyed() except NotFoundError: self.logger.debug('Ignored slap error while trying to inform about ' 'destroying not fully configured Computer Partition %r' % computer_partition.getId()) except ServerError as server_error: self.logger.debug('Ignored server error while trying to inform about ' 'destroying Computer Partition %r. Error is:\n%r' % (computer_partition.getId(), server_error.args[0])) self.logger.info('Finished usage reports.') # Return success value if not clean_run: return SLAPGRID_FAIL return SLAPGRID_SUCCESS
def destroy(self): """Destroys the partition and makes it available for subsequent use." """ self.logger.info("Destroying Computer Partition %s..." % self.computer_partition.getId()) self.createRetentionLockDate() if not self.checkRetentionIsAuthorized(): return False # Launches "destroy" binary if exists destroy_executable_location = os.path.join(self.instance_path, 'sbin', 'destroy') if os.path.exists(destroy_executable_location): uid, gid = self.getUserGroupId() self.logger.debug('Invoking %r' % destroy_executable_location) process_handler = SlapPopen([destroy_executable_location], preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=self.instance_path, env=getCleanEnvironment(logger=self.logger, home_path=pwd.getpwuid(uid).pw_dir), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, logger=self.logger) if process_handler.returncode is None or process_handler.returncode != 0: message = 'Failed to destroy Computer Partition in %r.' % \ self.instance_path self.logger.error(message) raise subprocess.CalledProcessError(message, process_handler.output) # Manually cleans what remains try: for f in [self.key_file, self.cert_file]: if f: if os.path.exists(f): os.unlink(f) # better to manually remove symlinks because rmtree might choke on them sr_symlink = os.path.join(self.instance_path, 'software_release') if os.path.islink(sr_symlink): os.unlink(sr_symlink) data_base_link = os.path.join(self.instance_path, CP_STORAGE_FOLDER_NAME) if self.instance_storage_home and os.path.exists(data_base_link) and \ os.path.isdir(data_base_link): for filename in os.listdir(data_base_link): data_symlink = os.path.join(data_base_link, filename) partition_data_path = os.path.join(self.instance_storage_home, filename, self.partition_id) if os.path.lexists(data_symlink): os.unlink(data_symlink) if os.path.exists(partition_data_path): self.cleanupFolder(partition_data_path) self.cleanupFolder(self.instance_path) # Cleanup all Data storage location of this partition if os.path.exists(self.supervisord_partition_configuration_path): os.remove(self.supervisord_partition_configuration_path) self.updateSupervisor() except IOError as exc: raise IOError("I/O error while freeing partition (%s): %s" % (self.instance_path, exc)) return True
def install(self): """ Creates configuration file from template in software_path, then installs the software partition with the help of buildout """ self.logger.info("Installing Computer Partition %s..." % self.computer_partition.getId()) self.check_free_space() # Checks existence and permissions of Partition directory # Note : Partitions have to be created and configured before running slapgrid if not os.path.isdir(self.instance_path): raise PathDoesNotExistError('Please create partition directory %s' % self.instance_path) sr_symlink = os.path.join(self.instance_path, 'software_release') self.updateSymlink(sr_symlink, self.software_path) instance_stat_info = os.stat(self.instance_path) permission = stat.S_IMODE(instance_stat_info.st_mode) if permission != REQUIRED_COMPUTER_PARTITION_PERMISSION: raise WrongPermissionError('Wrong permissions in %s: actual ' 'permissions are: 0%o, wanted are 0%o' % (self.instance_path, permission, REQUIRED_COMPUTER_PARTITION_PERMISSION)) os.environ = getCleanEnvironment(logger=self.logger, home_path=pwd.getpwuid(instance_stat_info.st_uid).pw_dir) # Check that Software Release directory is present if not os.path.exists(self.software_path): # XXX What should it raise? raise IOError('Software Release %s is not present on system.\n' 'Cannot deploy instance.' % self.software_release_url) # Generate buildout instance profile from template in Software Release template_location = os.path.join(self.software_path, 'instance.cfg') if not os.path.exists(template_location): # Backward compatibility: "instance.cfg" file was named "template.cfg". if os.path.exists(os.path.join(self.software_path, 'template.cfg')): template_location = os.path.join(self.software_path, 'template.cfg') else: # No template: Software Release is either inconsistent or not correctly installed. # XXX What should it raise? raise IOError('Software Release %s is not correctly installed.\nMissing file: %s' % ( self.software_release_url, template_location)) config_location = os.path.join(self.instance_path, 'buildout.cfg') self.logger.debug("Copying %r to %r" % (template_location, config_location)) shutil.copy(template_location, config_location) # fill generated buildout with additional information buildout_text = open(config_location).read() buildout_text += '\n\n' + pkg_resources.resource_string(__name__, 'templates/buildout-tail.cfg.in') % { 'computer_id': self.computer_id, 'partition_id': self.partition_id, 'server_url': self.server_url, 'software_release_url': self.software_release_url, 'key_file': self.key_file, 'cert_file': self.cert_file, 'storage_home': self.instance_storage_home, 'global_ipv4_network_prefix': self.ipv4_global_network, } open(config_location, 'w').write(buildout_text) os.chmod(config_location, 0o640) # Try to find the best possible buildout: # *) if software_root/bin/bootstrap exists use this one to bootstrap # locally # *) as last resort fallback to buildout binary from software_path bootstrap_candidate_dir = os.path.abspath(os.path.join(self.software_path, 'bin')) if os.path.isdir(bootstrap_candidate_dir): bootstrap_candidate_list = [q for q in os.listdir(bootstrap_candidate_dir) if q.startswith('bootstrap')] else: bootstrap_candidate_list = [] uid, gid = self.getUserGroupId() os.chown(config_location, -1, int(gid)) if len(bootstrap_candidate_list) == 0: buildout_binary = os.path.join(self.software_path, 'bin', 'buildout') self.logger.info("Falling back to default buildout %r" % buildout_binary) else: if len(bootstrap_candidate_list) != 1: raise ValueError('More than one bootstrap candidate found.') # Reads uid/gid of path, launches buildout with thoses privileges bootstrap_file = os.path.abspath(os.path.join(bootstrap_candidate_dir, bootstrap_candidate_list[0])) first_line = open(bootstrap_file, 'r').readline() invocation_list = [] if first_line.startswith('#!'): invocation_list = first_line[2:].split() invocation_list.append(bootstrap_file) self.logger.debug('Invoking %r in %r' % (' '.join(invocation_list), self.instance_path)) process_handler = SlapPopen(invocation_list, preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=self.instance_path, env=getCleanEnvironment(logger=self.logger, home_path=pwd.getpwuid(uid).pw_dir), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, logger=self.logger) if process_handler.returncode is None or process_handler.returncode != 0: message = 'Failed to bootstrap buildout in %r.' % (self.instance_path) self.logger.error(message) raise BuildoutFailedError('%s:\n%s\n' % (message, process_handler.output)) buildout_binary = os.path.join(self.instance_path, 'sbin', 'buildout') if not os.path.exists(buildout_binary): # use own buildout generation utils.bootstrapBuildout(path=self.instance_path, buildout=self.buildout, logger=self.logger, additional_buildout_parameter_list= ['buildout:bin-directory=%s' % os.path.join(self.instance_path, 'sbin')]) buildout_binary = os.path.join(self.instance_path, 'sbin', 'buildout') # Launches buildout utils.launchBuildout(path=self.instance_path, buildout_binary=buildout_binary, logger=self.logger) self.generateSupervisorConfigurationFile() self.createRetentionLockDelay()