def save(self, snapshot, backup_id, cinder): job_stats_path = self._stats_file(snapshot['origin']) logger.rename('lunr.storage.helper.backup.save') setproctitle("lunr-save: " + backup_id) size = snapshot['size'] / 1024 / 1024 / 1024 try: op_start = time() worker = Worker(snapshot['origin'], conf=self.conf, stats_path=job_stats_path) except exc.ClientException, e: if e.http_status != 404: raise op_start = time() conn = get_conn(self.conf) conn.put_container(snapshot['origin']) logger.warning("failed to retrieve manifest;" " first time backup for this volume?") # TODO: write the block_size on the manifest at create? block_count, remainder = divmod(snapshot['size'], BLOCK_SIZE) if remainder: block_count += 1 # initial backup is the only time the we need to worry about # creating a new manifest for the worker worker = Worker(snapshot['origin'], conf=self.conf, manifest=Manifest.blank(block_count), stats_path=job_stats_path)
def create(self, volume_id, size=None, backup_source_volume_id=None, backup_id=None, image_id=None, callback=None, lock=None, account=None, cinder=None, scrub_callback=None): op_start = time() size_str = self._get_size_str(size) tmp_vol = None snet_glance = None if image_id: mgmt_glance = get_glance_conn(self.conf, tenant_id=account, glance_urls=self.glance_mgmt_urls) snet_glance = get_glance_conn(self.conf, tenant_id=account) try: glance_start = time() image = mgmt_glance.head(image_id) logger.info('STAT: glance.head %r. Time: %r' % (image_id, time() - glance_start)) status = getattr(image, 'status', 'ACTIVE') if status.upper() != 'ACTIVE': raise InvalidImage("Non-active image status: %s" % status) min_disk = getattr(image, 'min_disk', 0) if min_disk > 127: raise InvalidImage("Image > 127GB: %s" % image_id) if min_disk: multiplier = self._get_scratch_multiplier(image) convert_gbs = int(min_disk * multiplier) else: convert_gbs = self.convert_gbs tmp_vol = self.create_convert_scratch(image, convert_gbs) except GlanceError, e: logger.warning("Error fetching glance image: %s" % e) raise InvalidImage("Error fetching image: %s" % image_id)
def create(self, volume_id, size=None, backup_source_volume_id=None, backup_id=None, image_id=None, callback=None, lock=None, account=None, cinder=None, scrub_callback=None): op_start = time() size_str = self._get_size_str(size) tmp_vol = None if image_id: mgmt_glance = get_glance_conn(self.conf, tenant_id=account, glance_urls=self.glance_mgmt_urls) try: glance_start = time() image = mgmt_glance.head(image_id) logger.info('STAT: glance.head %r. Time: %r' % (image_id, time() - glance_start)) status = getattr(image, 'status', 'ACTIVE') if status.upper() != 'ACTIVE': raise InvalidImage("Non-active image status: %s" % status) min_disk = getattr(image, 'min_disk', 0) if min_disk > 127: raise InvalidImage("Image > 127GB: %s" % image_id) if min_disk: multiplier = self._get_scratch_multiplier(image) convert_gbs = int(min_disk * multiplier) else: convert_gbs = self.convert_gbs tmp_vol = self.create_convert_scratch(image, convert_gbs) except GlanceError, e: logger.warning("Error fetching glance image: %s" % e) raise InvalidImage("Error fetching image: %s" % image_id)
def progress_callback(percent): try: if cinder: cinder.update_volume_metadata( clone_id, {'clone-progress': "%.2f%%" % percent}) except CinderError, e: logger.warning( "Error updating clone-progress metadata: %s" % e)
def progress_callback(percent): try: if cinder: cinder.update_volume_metadata( clone_id, {'clone-progress': "%.2f%%" % percent}) except CinderError, e: logger.warning("Error updating clone-progress metadata: %s" % e)
def callback(): self.helper.volumes.delete(snapshot['id']) self.helper.make_api_request('backups', self.id, data={'status': 'AVAILABLE'}) if cinder: try: cinder.snapshot_progress(self.id, "100%") except cinderclient.CinderError, e: logger.warning('Error updating snapshot progress: %s' % e)
def head(self, image_id): try: return self.client.images.get(image_id) except (glance_exc.BaseException, glance_exc.HTTPException) as e: logger.warning( "Exception in glance.head, host: %s, id: %s, error: %s" % (self.glance_url, image_id, e)) self._init_client() return self.head(image_id)
def prune(self, volume, backup_id): logger.rename("lunr.storage.helper.backup.prune") setproctitle("lunr-prune: " + backup_id) try: op_start = time() worker = Worker(volume["id"], self.conf) except exc.ClientException, e: # If the manifest doesn't exist, We consider the backup deleted. # If anything else happens, we bail. if e.http_status != 404: raise logger.warning("No manifest found pruning volume: %s" % volume["id"]) return
def prune(self, volume, backup_id): logger.rename('lunr.storage.helper.backup.prune') setproctitle("lunr-prune: " + backup_id) try: op_start = time() worker = Worker(volume['id'], self.conf) except exc.ClientException, e: # If the manifest doesn't exist, We consider the backup deleted. # If anything else happens, we bail. if e.http_status != 404: raise logger.warning('No manifest found pruning volume: %s' % volume['id']) return
def audit(self, volume): logger.rename("lunr.storage.helper.backup.audit") setproctitle("lunr-audit: " + volume["id"]) try: op_start = time() worker = Worker(volume["id"], self.conf) except exc.ClientException, e: if e.http_status != 404: raise op_start = time() conn = get_conn(self.conf) conn.put_container(volume["id"]) logger.warning("failed to retrieve manifest;" " auditing volume with no backups") # creating a blank manifest for the worker worker = Worker(volume["id"], conf=self.conf, manifest=Manifest.blank(0))
def run(self, now=None): now = now or datetime.now() try: for volume in self.suspects(self.span, now=now).all(): log.info("Long running scrub '%s' on node '%s'" % (volume.id, volume.node.id)) url = 'http://%s:%s/volumes/%s' % (volume.node.hostname, volume.node.port, volume.id) # Make a volume create call to the storage server if self.delete(url): log.info("Restarted scrub job '%s' on node '%s'" % (volume.id, volume.node.id)) # Update last_modified so we don't restart next time we run volume.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def write_raw_image(self, glance, image, path): op_start = time() with open(path, 'wb') as f: # Try until we run out of glances. while True: try: chunks = glance.get(image.id) except GlanceError, e: logger.warning("Error fetching glance image: %s" % e) raise try: for chunk in chunks: f.write(chunk) break # Glanceclient doesn't handle socket timeouts for chunk reads. except (GlanceError, socket.timeout) as e: continue
def call(self, request): # Match the Request URL to an action action = self.match(request) for attempts in range(0, 3): try: result = action(request) self.helper.commit() return result except OperationalError, e: if hasattr(e, 'orig') and e.orig.args[0] == 2006: # MySQL server has gone away logger.warning("DB connection error attempt #%d" % attempts, exc_info=True) sleep(2 ** attempts) continue logger.exception("Database Error: %s" % e) raise HTTPInternalServerError("Internal database error") finally:
def audit(self, volume): logger.rename('lunr.storage.helper.backup.audit') setproctitle("lunr-audit: " + volume['id']) try: op_start = time() worker = Worker(volume['id'], self.conf) except exc.ClientException, e: if e.http_status != 404: raise op_start = time() conn = get_conn(self.conf) conn.put_container(volume['id']) logger.warning("failed to retrieve manifest;" " auditing volume with no backups") # creating a blank manifest for the worker worker = Worker(volume['id'], conf=self.conf, manifest=Manifest.blank(0))
def run(self, now=None): now = now or datetime.now() try: for backup in self.suspects(self.span, now=now).all(): log.info("Long running prune '%s' on node '%s'" % (backup.id, backup.volume.node.id)) if not self.locked(backup): # DELETE /volumes/{volume_id}/backups/{backup_id} # ?account=account.id if self.delete(self.url(backup), account=backup.account.id): log.info("Restarting prune '%s' on node '%s'" % (backup.id, backup.volume.node.id)) # Update last_modified so we don't restart next time backup.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def run(self, now=None): now = now or datetime.now() try: for volume in self.suspects(self.span, now=now).all(): log.info("Long running scrub '%s' on node '%s'" % (volume.name, volume.node.id)) url = 'http://%s:%s/volumes/%s' % (volume.node.hostname, volume.node.port, volume.name) # Make a volume create call to the storage server if self.delete(url): log.info("Restarted scrub job '%s' on node '%s'" % (volume.name, volume.node.id)) # Update last_modified so we don't restart next time we run volume.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def run(self, now=None): now = now or datetime.now() try: for backup in self.suspects(self.span, now=now).all(): log.info("Backup audit suspect: '%s' on node '%s'" % (backup.id, backup.volume.node.id)) if not self.locked(backup): # PUT /volumes/{id}/audit # ?backup_id=backup_id&account=account_id if self.put(self.url(backup, '/audit'), account=backup.account.id, backup_id=backup.id): log.info("Running audit for backup '%s' on node '%s'" % (backup.id, backup.volume.node.id)) # Update last_modified so we don't restart next time backup.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def run(self, now=None): now = now or datetime.now() # Find all suspect backups that are older than span query = self.find(self.span, now=now) try: # Attempt to clean up detached exports for export in query.all(): log.info("Found stuck detach '%s' on node '%s'" % (export.id, export.volume.node.id)) # Ask the node if xen initiator is connected to the target if not self.connected(export.volume): log.info("Asking cinder to complete the detach for '%s'" % (export.id)) # Tell cinder about the detach self.detach(export) # Update last_modified so we don't restart next time export.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def get_registration_exceptions(local_info, node_info): exceptions = {} for k, v in local_info.items(): if 'hostname' in k and node_info[k] != v: try: node_value = socket.gethostbyname(node_info[k]) except socket.error: # skip hostname translation on failure pass else: try: node_value = node_info[k] except KeyError, e: logger.error("During registration; missing '%s' key in api " "server response" % k) continue if node_value != v: logger.warning("Invalid '%s' registered " "as %r != %r" % (k, node_value, v)) exceptions[k] = v else: logger.info("Verified '%s' registered as '%s'" % (k, v))
def run(self, now=None): now = now or datetime.now() try: for restore in self.suspects(self.span, now=now).all(): log.info("Long running restore '%s' on node '%s'" % (restore.id, restore.node.id)) url = 'http://%s:%s/volumes/%s' % (restore.node.hostname, restore.node.port, restore.id) # Get the Backup the restore is of backup = self._sess.query(Backup).get(restore.restore_of) # Make a restore create call to the storage server if self.put(url, backup_source_volume_id=backup.volume_id, backup_id=backup.id, size=backup.size, account=backup.account.id): log.info("Restarted restore job '%s' on node '%s'" % (restore.id, restore.node.id)) # Update last_modified so we don't restart next time restore.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def run(self, now=None): now = now or datetime.now() # Find all suspect backups that are older than span query = self.suspects(self.span, now=now) try: # Attempt to restart all the backups that are suspect for backup in query.all(): log.info("Long running backup '%s' on node '%s'" % (backup.id, backup.volume.node.id)) # Re-create the timestamp used timestamp = int(mktime(backup.created_at.timetuple())) # Make a create call to the storage server if self.put(self.url(backup), timestamp=timestamp, account=backup.account.id): log.info("Restarted backup '%s' on node '%s'" % (backup.id, backup.volume.node.id)) # Update last_modified so we don't restart next time backup.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
def run(self, now=None): now = now or datetime.now() try: for restore in self.suspects(self.span, now=now).all(): log.info("Long running restore '%s' on node '%s'" % (restore.id, restore.node.id)) url = 'http://%s:%s/volumes/%s' % ( restore.node.hostname, restore.node.port, restore.id) # Get the Backup the restore is of backup = self._sess.query(Backup).get(restore.restore_of) # Make a restore create call to the storage server if self.put(url, backup_source_volume_id=backup.volume_id, backup_id=backup.id, size=backup.size, account=backup.account.id): log.info("Restarted restore job '%s' on node '%s'" % (restore.id, restore.node.id)) # Update last_modified so we don't restart next time restore.last_modified = func.now() self._sess.commit() except OperationalError, e: logger.warning("DB error", exc_info=True) self._sess.close()
try: op_start = time() worker = Worker(volume['id'], self.conf) except exc.ClientException, e: # If the manifest doesn't exist, We consider the backup deleted. # If anything else happens, we bail. if e.http_status != 404: raise logger.warning('No manifest found pruning volume: %s' % volume['id']) return try: history = worker.delete(backup_id) except NotFound, e: logger.warning("backup_id: '%s' missing from manifest in prune" % backup_id) return duration = time() - op_start logger.info('STAT: pruning %r. Time: %r s' % (backup_id, duration)) def delete(self, volume, backup_id, callback=None, lock=None): spawn(lock, self.prune, volume, backup_id, callback=callback, skip_fork=self.skip_fork) def audit(self, volume): logger.rename('lunr.storage.helper.backup.audit') setproctitle("lunr-audit: " + volume['id']) try: op_start = time() worker = Worker(volume['id'], self.conf) except exc.ClientException, e:
try: op_start = time() worker = Worker(volume['id'], self.conf) except exc.ClientException, e: # If the manifest doesn't exist, We consider the backup deleted. # If anything else happens, we bail. if e.http_status != 404: raise logger.warning('No manifest found pruning volume: %s' % volume['id']) return try: history = worker.delete(backup_id) except NotFound, e: logger.warning("backup_id: '%s' missing from manifest in prune" % backup_id) return duration = time() - op_start logger.info('STAT: pruning %r. Time: %r s' % (backup_id, duration)) def delete(self, volume, backup_id, callback=None, lock=None): spawn(lock, self.prune, volume, backup_id, callback=callback, skip_fork=self.skip_fork) def audit(self, volume): logger.rename('lunr.storage.helper.backup.audit') setproctitle("lunr-audit: " + volume['id'])