def create_image(self, **kwargs): if self.times_to_fail is None: raise exceptions.BuilderError("Test fail image upload.") self.times_failed += 1 if self.times_failed <= self.times_to_fail: raise exceptions.BuilderError("Test fail image upload.") else: return super(FakeUploadFailCloud, self).create_image(**kwargs)
def start(self): ''' Start the builder. The builder functionality is encapsulated within threads run by the NodePoolBuilder. This starts the needed sub-threads which will run forever until we tell them to stop. ''' with self._start_lock: if self._running: raise exceptions.BuilderError('Cannot start, already running.') self._config = self._getAndValidateConfig() self._running = True builder_id_file = os.path.join(self._config.imagesdir, "builder_id.txt") builder_id = self._getBuilderID(builder_id_file) # All worker threads share a single ZooKeeper instance/connection. self.zk = zk.ZooKeeper(enable_cache=False) self.zk.connect(list(self._config.zookeeper_servers.values())) self.log.debug('Starting listener for build jobs') # Create build and upload worker objects for i in range(self._num_builders): w = BuildWorker(i, builder_id, self._config_path, self._secure_path, self.build_interval, self.zk, self.dib_cmd) w.start() self._build_workers.append(w) for i in range(self._num_uploaders): w = UploadWorker(i, builder_id, self._config_path, self._secure_path, self.upload_interval, self.zk) w.start() self._upload_workers.append(w) if self.cleanup_interval > 0: self._janitor = CleanupWorker( 0, builder_id, self._config_path, self._secure_path, self.cleanup_interval, self.zk) self._janitor.start() # Wait until all threads are running. Otherwise, we have a race # on the worker _running attribute if shutdown() is called before # run() actually begins. workers = self._build_workers + self._upload_workers if self._janitor: workers += [self._janitor] while not all([ x.running for x in (workers)]): time.sleep(0)
def to_path(self, images_dir, with_extension=True): my_path = os.path.join(images_dir, self.image_id) if with_extension: if self.extension is None: raise exceptions.BuilderError( 'Cannot specify image extension of None') my_path += '.' + self.extension md5_path = '%s.%s' % (my_path, 'md5') md5 = self._checksum(md5_path) if md5: self.md5_file = md5_path self.md5 = md5[0:32] sha256_path = '%s.%s' % (my_path, 'sha256') sha256 = self._checksum(sha256_path) if sha256: self.sha256_file = sha256_path self.sha256 = sha256[0:64] return my_path
def _buildImage(self, build_id, diskimage): ''' Run the external command to build the diskimage. :param str build_id: The ID for the build (used in image filename). :param diskimage: The diskimage as retrieved from our config file. :returns: An ImageBuild object of build-related data. :raises: BuilderError if we failed to execute the build command. ''' base = "-".join([diskimage.name, build_id]) image_file = DibImageFile(base) filename = image_file.to_path(self._config.imagesdir, False) env = os.environ.copy() env['DIB_RELEASE'] = diskimage.release env['DIB_IMAGE_NAME'] = diskimage.name env['DIB_IMAGE_FILENAME'] = filename # Note we use a reference to the nodepool config here so # that whenever the config is updated we get up to date # values in this thread. if self._config.elementsdir: env['ELEMENTS_PATH'] = self._config.elementsdir # send additional env vars if needed for k, v in diskimage.env_vars.items(): env[k] = v img_elements = diskimage.elements img_types = ",".join(diskimage.image_types) qemu_img_options = '' if 'qcow2' in img_types: qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s %s' % (self.dib_cmd, img_types, qemu_img_options, filename, img_elements)) self._pruneBuildLogs(diskimage.name) log_fn = self._getBuildLog(diskimage.name, build_id) self.log.info('Running %s' % (cmd,)) self.log.info('Logging to %s' % (log_fn,)) start_time = time.monotonic() try: p = subprocess.Popen( shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env) except OSError as e: raise exceptions.BuilderError( "Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror) ) with open(log_fn, 'wb') as log: while True: ln = p.stdout.readline() log.write(ln) log.flush() if not ln: break rc = p.wait() m = "Exit code: %s\n" % rc log.write(m.encode('utf8')) # It's possible the connection to the ZK cluster could have been # interrupted during the build. If so, wait for it to return. # It could transition directly from SUSPENDED to CONNECTED, or go # through the LOST state before CONNECTED. did_suspend = False while self._zk.suspended or self._zk.lost: did_suspend = True self.log.info("ZooKeeper suspended during build. Waiting") time.sleep(SUSPEND_WAIT_TIME) if did_suspend: self.log.info("ZooKeeper available. Resuming") build_time = time.monotonic() - start_time build_data = zk.ImageBuild() build_data.builder_id = self._builder_id build_data.builder = self._hostname build_data.username = diskimage.username if self._zk.didLoseConnection: self.log.info("ZooKeeper lost while building %s" % diskimage.name) self._zk.resetLostFlag() build_data.state = zk.FAILED elif p.returncode: self.log.info( "DIB failed creating %s (%s)" % (diskimage.name, p.returncode)) build_data.state = zk.FAILED else: self.log.info("DIB image %s is built" % diskimage.name) build_data.state = zk.READY build_data.formats = list(diskimage.image_types) if self._statsd: # record stats on the size of each image we create for ext in img_types.split(','): key = 'nodepool.dib_image_build.%s.%s.size' % ( diskimage.name, ext) # A bit tricky because these image files may be sparse # files; we only want the true size of the file for # purposes of watching if we've added too much stuff # into the image. Note that st_blocks is defined as # 512-byte blocks by stat(2) size = os.stat("%s.%s" % (filename, ext)).st_blocks * 512 self.log.debug("%s created image %s.%s (size: %d) " % (diskimage.name, filename, ext, size)) self._statsd.gauge(key, size) if self._statsd: # report result to statsd for ext in img_types.split(','): key_base = 'nodepool.dib_image_build.%s.%s' % ( diskimage.name, ext) self._statsd.gauge(key_base + '.rc', rc) self._statsd.timing(key_base + '.duration', int(build_time * 1000)) return build_data