def __init__(self, config, ec2, instance_db, lock): self.config = config self.ec2 = ec2 self.instance_db = instance_db self.lock = lock self.sns = SNSChannel(config)
class SnapshotRunner(object): # key to min time since last backup b4 we take a new one for the # period. allowed_periods = { "daily": timedelta(0.9), "weekly": timedelta(6.9), "monthly": timedelta(27.5)} def __init__(self, config, ec2, instance_db, lock): self.config = config self.ec2 = ec2 self.instance_db = instance_db self.lock = lock self.sns = SNSChannel(config) def get_snapshot_instances(self, options): """ Get all instances registered for auto snapshots """ if options.tag: return self._get_tagged_instances(options.tag) return self._get_registered_instances() def _get_tagged_instances(self, tag): """Support instance selection for backup based on a tag value. """ tag_name, tag_value = tag.split(":", 1) for r in self.ec2.get_all_instances( filters={'tag:%s' % tag_name: tag_value}): for i in r.instances: yield ({}, i) def _get_registered_instances(self): """Support instance backup based on registration. """ # hmm.. scan over all apps and all instances, sort of worst case. # this could be made more efficient, batching 20 instances at a time. for record in self.instance_db.scan(): # TODO check for non existant instance and mark/dead results = self.ec2.get_all_instances([record['instance_id']]) if not results: log.warning( "Could not find registered instance %s", record["instance_id"]) continue for r in results: i = r.instances.pop() yield (record, i) def get_instance_volumes(self, i): if i.root_device_type != "ebs": log.warning( "Not backing up instance: %s/%s non ebs root device", i.id, i.tags.get("Name", "NA")) return devs = i.block_device_mapping.items() # Refuse the temptation to guess. If there are multiple volumes # attached to an instance, it could be raided/lvm/etc and we need # coordination with the instance to get a multi-volume consistent snap. if len(devs) > 2: log.warning( "Not backing up instance: %s/%s, more than one volume", i.id, i.tags.get("Name", "NA")) return for dev_name, bdt in devs: if not bdt.volume_id: continue yield bdt.volume_id, dev_name def run_period(self, options): """ Create backups for the given period for all registered instances. """ period = options.period now = datetime.now(tzutc()) log.info("Creating snapshots for %s on %s" % ( period, now.strftime("%Y/%m/%d"))) for r, i in self.get_snapshot_instances(options): with self.lock.acquire("snapshot-%s" % i.id): for vol_id, dev in self.get_instance_volumes(i): self._snapshot_instance(r, i, vol_id, dev, now, period) def _snapshot_instance(self, r, i, vol_id, dev, now, period): """ arg: r -> record arg: i -> boto ec2 instance arg: now -> datetime of cur time. """ # Get previous snapshots snapshots = self.ec2.get_all_snapshots( filters={'tag:inst_snap': "%s/%s" % (i.id, period)}) snapshots.sort( key=operator.attrgetter('start_time'), reverse=True) name = r.get('unit_name') or i.tags.get('Name') or i.id # Check if its too soon for a new snapshot from the last if snapshots: last_snapshot = date_parse(snapshots[0].start_time) if now - last_snapshot < self.allowed_periods[period]: log.warning( "Skipping %s, last snapshot for %s was %s", name, period, now - last_snapshot) return # Create new snapshot description = "%s %s %s" % ( name, period.capitalize(), now.strftime("%Y-%m-%d")) log.debug("Snapshotting %s on %s as %s", i.id, vol_id, description) snapshot = self.ec2.create_snapshot(vol_id, description) snapshot.add_tag('Name', description) # Copy over instance tags to the snapshot except name. for k, v in i.tags.items(): if k == "Name": continue snapshot.add_tag(k, v) # If the instance was registered with an app id, and the # instance doesn't already have one, then copy over the # registed one as a tag. if 'app_id' in r and not 'app_id' in i.tags: snapshot.add_tag('app_id', r['app_id']) # Record metadata for restoration and backup system snapshot.add_tag('inst_snap', "%s/%s" % (i.id, period)) snapshot.add_tag('inst_dev', dev) # Trim extras backup_count = self.config.get("%s-backups" % period) snapshots.insert(0, snapshot) if len(snapshots) <= backup_count: return log.info("Trimming excess %s snapshots %s max:%d existing:%d" % ( period, [s.tags.get('Name') for s in snapshots[backup_count:]], backup_count, len(snapshots))) for s in snapshots[backup_count:]: try: s.delete() except Exception as e: ## don't exit if the snapshot can't be deleted. msg = "{}: Trimming {} failed for {}: '{}'".format( now.strftime("%Y-%m-%d %H:%M:%S"), s, i.id, e.message) log.critical(msg) self.sns.notify(msg, "iaas-backup: Error trimming {} " .format(s)) def _get_instance(self, options): reservations = self.ec2.get_all_instances([options.instance_id]) if not len(reservations) == 1: log.error("Invalid instance id %s" % options.instance_id) return if not len(reservations[0].instances) == 1: log.error("Invalid instance id %s" % options.instance_id) return instance = reservations[0].instances[0] return instance def register(self, options): """Register an instance for the snapshot system. """ instance = getattr(options, 'instance', None) if instance is None: instance = self._get_instance(options) if instance is None: return log.info("Registering snapshot instance") vol_id = self.get_instance_volume(instance) if vol_id is None: return item = self.instance_db.new_item( options.app_id, instance.id, { 'record': instance.id, 'unit_name': options.unit and options.unit.strip() or ""}) item.save() log.info("Instance %s registered for snapshots", instance.id) return True