def rsync_snapshot(src_region_name, snapshot_id, dst_region_name, src_inst=None, dst_inst=None): """Duplicate the snapshot into dst_region. src_region_name, dst_region_name Amazon region names. Allowed to be contracted, e.g. `ap-southeast-1` will be recognized in `ap-south` or even `ap-s`; snapshot_id snapshot to duplicate; src_inst, dst_inst will be used instead of creating new for temporary. You'll need to open port 60000 for encrypted instances replication.""" src_conn = get_region_conn(src_region_name) src_snap = src_conn.get_all_snapshots([snapshot_id])[0] dst_conn = get_region_conn(dst_region_name) _src_device = get_snap_device(src_snap) _src_dev = re.match(r'^/dev/sda$', _src_device) # check for encryption if _src_dev: encr = True logger.info('Found traces of encryption') else: encr = None info = 'Going to transmit {snap.volume_size} GiB {snap} {snap.description}' if src_snap.tags.get('Name'): info += ' of {name}' info += ' from {snap.region} to {dst}' logger.info(info.format(snap=src_snap, dst=dst_conn.region, name=src_snap.tags.get('Name'))) dst_snaps = dst_conn.get_all_snapshots(owner='self') dst_snaps = [snp for snp in dst_snaps if not snp.status == 'error'] src_vol = get_snap_vol(src_snap) vol_snaps = [snp for snp in dst_snaps if get_snap_vol(snp) == src_vol] if vol_snaps: dst_snap = sorted(vol_snaps, key=get_snap_time)[-1] if get_snap_time(dst_snap) >= get_snap_time(src_snap): kwargs = dict(src=src_snap, dst=dst_snap, dst_reg=dst_conn.region) logger.info('Stepping over {src} - it\'s not newer than {dst} ' '{dst.description} in {dst_reg}'.format(**kwargs)) return else: dst_snap = create_empty_snapshot(dst_conn.region, src_snap.volume_size) with nested(attach_snapshot(src_snap, inst=src_inst, encr=encr), attach_snapshot(dst_snap, inst=dst_inst, encr=encr)) as ( (src_vol, src_mnt), (dst_vol, dst_mnt)): update_snap(src_vol, src_mnt, dst_vol, dst_mnt, encr, delete_old=not vol_snaps) # Delete only empty snapshots.
def get_relevant_snapshots( conn, tag_name=DEFAULT_TAG_NAME, tag_value=DEFAULT_TAG_VALUE, native_only=True, filters={'status': SNAP_STATUSES}): """Returns snapshots with proper description.""" if tag_name and tag_value: filters.update({'tag:{0}'.format(tag_name): tag_value}) snaps = conn.get_all_snapshots(owner='self', filters=filters) is_described = lambda snap: get_snap_vol(snap) and get_snap_time(snap) snaps = [snp for snp in snaps if is_described(snp)] if native_only: is_native = lambda snp, reg: get_descr_attr(snp, 'Region') == reg.name snaps = [snp for snp in snaps if is_native(snp, conn.region)] return snaps
def rsync_region(src_region_name, dst_region_name, tag_name=None, tag_value=None, native_only=True): """Duplicates latest snapshots with given tag into dst_region. src_region_name, dst_region_name every latest volume snapshot from src_region will be rsynced to the dst_region; tag_name, tag_value snapshots will be filtered by tag. Tag will be fetched from config by default, may be configured per region; native_only sync only snapshots, created in the src_region_name. True by default.""" src_conn = get_region_conn(src_region_name) dst_conn = get_region_conn(dst_region_name) tag_name = tag_name or config.get(src_conn.region.name, 'TAG_NAME') tag_value = tag_value or config.get(src_conn.region.name, 'TAG_VALUE') filters = {'tag-key': tag_name, 'tag-value': tag_value} snaps = src_conn.get_all_snapshots(owner='self', filters=filters) snaps = [snp for snp in snaps if not snp.status == 'error'] _is_described = lambda snap: get_snap_vol(snap) and get_snap_time(snap) snaps = [snp for snp in snaps if _is_described(snp)] if native_only: def is_native(snap, region): return get_descr_attr(snap, 'Region') == region.name snaps = [snp for snp in snaps if is_native(snp, src_conn.region)] with nested(create_temp_inst(src_conn.region), create_temp_inst(dst_conn.region)) as (src_inst, dst_inst): snaps = sorted(snaps, key=get_snap_vol) # Prepare for grouping. for vol, vol_snaps in groupby(snaps, get_snap_vol): latest_snap = sorted(vol_snaps, key=get_snap_time)[-1] for inst in src_inst, dst_inst: logger.debug('Rebooting {0} in {0.region} ' 'to refresh attachments'.format(inst)) inst.reboot() args = (src_region_name, latest_snap.id, dst_region_name, src_inst, dst_inst) try: rsync_snapshot(*args) except: logger.exception('rsync of {1} from {0} to {2} failed'.format( *args))
def rsync_snapshot(src_region_name, snapshot_id, dst_region_name, src_inst=None, dst_inst=None, force=False): """Duplicate the snapshot into dst_region. src_region_name, dst_region_name Amazon region names. Allowed to be contracted, e.g. `ap-southeast-1` will be recognized in `ap-south` or even `ap-s`; snapshot_id snapshot to duplicate; src_inst, dst_inst will be used instead of creating new for temporary; force rsync snapshot even if newer version exist. You'll need to open port 60000 for encrypted instances replication.""" src_conn = get_region_conn(src_region_name) src_snap = src_conn.get_all_snapshots([snapshot_id])[0] dst_conn = get_region_conn(dst_region_name) _src_device = get_snap_device(src_snap) _src_dev = re.match(r'^/dev/sda$', _src_device) # check for encryption if _src_dev: encr = True logger.info('Found traces of encryption') else: encr = None info = 'Going to transmit {snap.volume_size} GiB {snap} {snap.description}' if src_snap.tags.get('Name'): info += ' of {name}' info += ' from {snap.region} to {dst}' logger.info(info.format(snap=src_snap, dst=dst_conn.region, name=src_snap.tags.get('Name'))) src_vol = get_snap_vol(src_snap) dst_snaps = get_relevant_snapshots(dst_conn, native_only=False) vol_snaps = [snp for snp in dst_snaps if get_snap_vol(snp) == src_vol] def sync_mountpoints(src_snap, src_vol, src_mnt, dst_vol, dst_mnt): # Marking temporary volume with snapshot's description. dst_vol.add_tag(DESCRIPTION_TAG, src_snap.description) snaps, vols = get_replicas(src_snap.description, dst_vol.connection) if not force and snaps: raise ReplicationCollisionError( 'Stepping over {snap} - it\'s already replicated as {snaps} ' 'in {snaps[0].region}'.format(snap=src_snap, snaps=snaps)) if not force and len(vols) > 1: timeout = src_snap.volume_size / REPLICATION_SPEED get_vol_time = lambda vol: parse(vol.create_time) def not_outdated(vol, now): age = now - get_vol_time(vol) return age.days * 24 * 60 * 60 + age.seconds < timeout now = datetime.utcnow().replace(tzinfo=tzutc()) actual_vols = [vol for vol in vols if not_outdated(vol, now)] hunged_vols = set(vols) - set(actual_vols) if len(actual_vols) > 1: oldest = sorted(actual_vols, key=get_vol_time)[0] if dst_vol.id != oldest.id: raise ReplicationCollisionError( 'Stepping over {snap} - it\'s already replicating to ' '{vol} in {vol.region}'.format(snap=src_snap, vol=oldest)) if len(hunged_vols) > 1: logger.warn( 'Replication to temporary {vols} created during ' 'transmitting {snap} to {reg} qualified as hunged up. ' 'Starting new replication process.'.format( snap=src_snap, vols=hunged_vols, reg=dst_vol.region)) update_snap(src_vol, src_mnt, dst_vol, dst_mnt, encr) if vol_snaps: dst_snap = sorted(vol_snaps, key=get_snap_time)[-1] with nested( attach_snapshot(src_snap, inst=src_inst, encr=encr), attach_snapshot(dst_snap, inst=dst_inst, encr=encr)) as ( (src_vol, src_mnt), (dst_vol, dst_mnt)): sync_mountpoints(src_snap, src_vol, src_mnt, dst_vol, dst_mnt) else: with nested( attach_snapshot(src_snap, inst=src_inst, encr=encr), create_tmp_volume(dst_conn.region, src_snap.volume_size)) as ( (src_vol, src_mnt), (dst_vol, dst_mnt)): sync_mountpoints(src_snap, src_vol, src_mnt, dst_vol, dst_mnt)
def _trim_snapshots(region, dry_run=False): """Delete snapshots back in time in logarithmic manner. dry_run just print snapshot to be deleted. Modified version of the `boto.ec2.connection.trim_snapshots <http://pypi.python.org/pypi/boto/2.0>_`. Licensed under MIT license by Mitch Garnaat, 2011.""" hourly_backups = config.getint('purge_backups', 'HOURLY_BACKUPS') daily_backups = config.getint('purge_backups', 'DAILY_BACKUPS') weekly_backups = config.getint('purge_backups', 'WEEKLY_BACKUPS') monthly_backups = config.getint('purge_backups', 'MONTHLY_BACKUPS') quarterly_backups = config.getint('purge_backups', 'QUARTERLY_BACKUPS') yearly_backups = config.getint('purge_backups', 'YEARLY_BACKUPS') # work with UTC time, which is what the snapshot start time is reported in now = datetime.utcnow() last_hour = datetime(now.year, now.month, now.day, now.hour) last_midnight = datetime(now.year, now.month, now.day) last_sunday = datetime(now.year, now.month, now.day) - timedelta(days=(now.weekday() + 1) % 7) last_month = datetime.now() - relativedelta(months=1) last_year = datetime.now() - relativedelta(years=1) other_years = datetime.now() - relativedelta(years=2) start_of_month = datetime(now.year, now.month, 1) target_backup_times = [] # there are no snapshots older than 1/1/2000 oldest_snapshot_date = datetime(2000, 1, 1) for hour in range(0, hourly_backups): target_backup_times.append(last_hour - timedelta(hours=hour)) for day in range(0, daily_backups): target_backup_times.append(last_midnight - timedelta(days=day)) for week in range(0, weekly_backups): target_backup_times.append(last_sunday - timedelta(weeks=week)) for month in range(0, monthly_backups): target_backup_times.append(last_month - relativedelta(months=month)) for quart in range(0, quarterly_backups): target_backup_times.append(last_year - relativedelta(months=4 * quart)) for year in range(0, yearly_backups): target_backup_times.append(other_years - relativedelta(years=year)) one_day = timedelta(days=1) while start_of_month > oldest_snapshot_date: # append the start of the month to the list of snapshot dates to save: target_backup_times.append(start_of_month) # there's no timedelta setting for one month, so instead: # decrement the day by one, #so we go to the final day of the previous month... start_of_month -= one_day # ... and then go to the first day of that previous month: start_of_month = datetime(start_of_month.year, start_of_month.month, 1) temp = [] for t in target_backup_times: if temp.__contains__(t) == False: temp.append(t) target_backup_times = temp target_backup_times.reverse() # make the oldest date first # get all the snapshots, sort them by date and time, #and organize them into one array for each volume: conn = get_region_conn(region.name) all_snapshots = conn.get_all_snapshots(owner='self') # oldest first all_snapshots.sort(cmp=lambda x, y: cmp(x.start_time, y.start_time)) snaps_for_each_volume = {} for snap in all_snapshots: # the snapshot name and the volume name are the same. # The snapshot name is set from the volume # name at the time the snapshot is taken volume_name = get_snap_vol(snap) if volume_name: # only examine snapshots that have a volume name snaps_for_volume = snaps_for_each_volume.get(volume_name) if not snaps_for_volume: snaps_for_volume = [] snaps_for_each_volume[volume_name] = snaps_for_volume snaps_for_volume.append(snap) # Do a running comparison of snapshot dates to desired time periods, # keeping the oldest snapshot in each # time period and deleting the rest: for volume_name in snaps_for_each_volume: snaps = snaps_for_each_volume[volume_name] snaps = snaps[:-1] # never delete the newest snapshot, so remove it from consideration time_period_num = 0 snap_found_for_this_time_period = False for snap in snaps: check_this_snap = True while (check_this_snap and time_period_num < target_backup_times.__len__()): if get_snap_time(snap) < target_backup_times[time_period_num]: # the snap date is before the cutoff date. # Figure out if it's the first snap in this # date range and act accordingly #(since both date the date ranges and the snapshots # are sorted chronologically, we know this #snapshot isn't in an earlier date range): if snap_found_for_this_time_period: if not snap.tags.get('preserve_snapshot'): if dry_run: logger.info('Dry-trimmed {0} {1} from {2}' .format(snap, snap.description, snap.start_time)) else: # as long as the snapshot wasn't marked with # the 'preserve_snapshot' tag, delete it: try: conn.delete_snapshot(snap.id) except EC2ResponseError as err: logger.exception(str(err)) else: logger.info('Trimmed {0} {1} from {2}' .format(snap, snap.description, snap.start_time)) # go on and look at the next snapshot, # leaving the time period alone else: # this was the first snapshot found for this time # period. Leave it alone and look at the next snapshot: snap_found_for_this_time_period = True check_this_snap = False else: # the snap is after the cutoff date. # Check it against the next cutoff date time_period_num += 1 snap_found_for_this_time_period = False