def main(): command = ' '.join(sys.argv[1:]) start_time = time.time() prometheus.command_start_time.labels(command=command).set(start_time) try: utils.subprocess_run( ['benji', '--log-level', settings.benji_log_level] + sys.argv[1:]) except Exception as exception: prometheus.command_status_failed.labels(command=command).set(1) completion_time = time.time() prometheus.command_completion_time.labels( command=command).set(completion_time) prometheus.command_runtime_seconds.labels( command=command).set(completion_time - start_time) prometheus.push(prometheus.command_registry) raise exception else: prometheus.command_status_succeeded.labels(command=command).set(1) completion_time = time.time() prometheus.command_completion_time.labels( command=command).set(completion_time) prometheus.command_runtime_seconds.labels( command=command).set(completion_time - start_time) prometheus.push(prometheus.command_registry) sys.exit(0)
def main(): incomplete_versions = subprocess_run([ 'benji', '--machine-output', '--log-level', settings.benji_log_level, 'ls', 'status == "incomplete" and date < "1 day ago"', ], decode_json=True) invalid_versions = subprocess_run([ 'benji', '--machine-output', '--log-level', settings.benji_log_level, 'ls', 'status == "invalid"', ], decode_json=True) prometheus.older_incomplete_versions.set( len(incomplete_versions['versions'])) prometheus.invalid_versions.set(len(invalid_versions['versions'])) prometheus.push(prometheus.version_status_registry, grouping_key={}) sys.exit(0)
def snapshot_create(*, volume: str, pool: str, image: str, snapshot: str, context: Any = None): signal_snapshot_create_pre.send(SIGNAL_SENDER, volume=volume, pool=pool, image=image, snapshot=snapshot, context=context) try: subprocess_run(['rbd', 'snap', 'create', f'{pool}/{image}@{snapshot}'], timeout=RBD_SNAP_CREATE_TIMEOUT) except Exception as exception: signal_snapshot_create_post_error.send(SIGNAL_SENDER, volume=volume, pool=pool, image=image, snapshot=snapshot, context=context, exception=exception) else: signal_snapshot_create_post_success.send(SIGNAL_SENDER, volume=volume, pool=pool, image=image, snapshot=snapshot, context=context)
def backup_differential(*, version_name: str, pool: str, image: str, last_snapshot: str, last_version_uid: int, version_labels: Dict[str, str], context: Any = None) -> Dict[str, str]: logger.info(f'Performing differential backup of {version_name}:{pool}/{image} from RBD snapshot" \ "{last_snapshot} and Benji version V{last_version_uid:09d}.') now = datetime.utcnow() snapshot = now.strftime(RBD_SNAP_NAME_PREFIX + '%Y-%m-%dT%H:%M:%SZ') snapshot_create(version_name=version_name, pool=pool, image=image, snapshot=snapshot, context=context) stdout = subprocess_run( ['rbd', 'diff', '--whole-object', '--format=json', '--from-snap', last_snapshot, f'{pool}/{image}@{snapshot}']) subprocess_run(['rbd', 'snap', 'rm', f'{pool}/{image}@{last_snapshot}']) with NamedTemporaryFile(mode='w+', encoding='utf-8') as rbd_hints: rbd_hints.write(stdout) rbd_hints.flush() benji_args = [ 'benji', '--machine-output', '--log-level', benji_log_level, 'backup', '--snapshot-name', snapshot, '--rbd-hints', rbd_hints.name, '--base-version', str(last_version_uid) ] for label_name, label_value in version_labels.items(): benji_args.extend(['--label', f'{label_name}={label_value}']) benji_args.extend([f'{pool}:{pool}/{image}@{snapshot}', version_name]) result = subprocess_run(benji_args, decode_json=True) assert isinstance(result, dict) return result
def backup_initial(*, volume: str, pool: str, namespace: str = '', image: str, version_labels: Dict[str, str], version_uid: Optional[str], source_compare: bool = False, context: Any = None) -> Dict[str, str]: now = datetime.utcnow() snapshot = now.strftime(RBD_SNAP_NAME_PREFIX + '%Y-%m-%dT%H:%M:%SZ') image_path = _rbd_image_path(pool=pool, namespace=namespace, image=image) snapshot_path = _rbd_image_path(pool=pool, namespace=namespace, image=image, snapshot=snapshot) logger.info(f'Performing initial backup of {volume}:{image_path}') snapshot_create(volume=volume, pool=pool, namespace=namespace, image=image, snapshot=snapshot, context=context) stdout = subprocess_run( ['rbd', 'diff', '--whole-object', '--format=json', snapshot_path]) with NamedTemporaryFile(mode='w+', encoding='utf-8') as rbd_hints: assert isinstance(stdout, str) rbd_hints.write(stdout) rbd_hints.flush() benji_args = [ 'benji', '--machine-output', '--log-level', benji_log_level, 'backup', '--snapshot', snapshot, '--rbd-hints', rbd_hints.name ] if version_uid is not None: benji_args.extend(['--uid', version_uid]) for label_name, label_value in version_labels.items(): benji_args.extend(['--label', f'{label_name}={label_value}']) benji_args.extend([f'{pool}:{snapshot_path}', volume]) result = subprocess_run(benji_args, decode_json=True) assert isinstance(result, dict) if source_compare: # We won't evaluate the returned result but any failure will raise an exception. deep_scrub(pool=pool, namespace=namespace, image=image, snapshot=snapshot, version_uid=version_uid) return result
def backup_initial(*, volume: str, pool: str, image: str, version_labels: Dict[str, str], version_uid: Optional[str], context: Any = None) -> Dict[str, str]: logger.info(f'Performing initial backup of {volume}:{pool}/{image}') now = datetime.utcnow() snapshot = now.strftime(RBD_SNAP_NAME_PREFIX + '%Y-%m-%dT%H:%M:%SZ') snapshot_create(volume=volume, pool=pool, image=image, snapshot=snapshot, context=context) stdout = subprocess_run([ 'rbd', 'diff', '--whole-object', '--format=json', f'{pool}/{image}@{snapshot}' ]) with NamedTemporaryFile(mode='w+', encoding='utf-8') as rbd_hints: assert isinstance(stdout, str) rbd_hints.write(stdout) rbd_hints.flush() benji_args = [ 'benji', '--machine-output', '--log-level', benji_log_level, 'backup', '--snapshot', snapshot, '--rbd-hints', rbd_hints.name ] if version_uid is not None: benji_args.extend(['--uid', version_uid]) for label_name, label_value in version_labels.items(): benji_args.extend(['--label', f'{label_name}={label_value}']) benji_args.extend([f'{pool}:{pool}/{image}@{snapshot}', volume]) result = subprocess_run(benji_args, decode_json=True) assert isinstance(result, dict) return result
def snapshot_create(*, volume: str, pool: str, namespace: str = '', image: str, snapshot: str, context: Any = None): signal_snapshot_create_pre.send(SIGNAL_SENDER, volume=volume, pool=pool, namespace=namespace, image=image, snapshot=snapshot, context=context) snapshot_path = _rbd_image_path(pool=pool, namespace=namespace, image=image, snapshot=snapshot) try: subprocess_run( ['rbd', 'snap', 'create', '--no-progress', snapshot_path], timeout=RBD_SNAP_CREATE_TIMEOUT) except Exception as exception: signal_snapshot_create_post_error.send(SIGNAL_SENDER, volume=volume, pool=pool, namespace=namespace, image=image, snapshot=snapshot, context=context, exception=exception) else: signal_snapshot_create_post_success.send(SIGNAL_SENDER, volume=volume, pool=pool, namespace=namespace, image=image, snapshot=snapshot, context=context)
def deep_scrub(*, pool: str, namespace: str = '', image: str, snapshot: str, version_uid: Optional[str]) -> Dict[str, str]: snapshot_path = _rbd_image_path(pool=pool, namespace=namespace, image=image, snapshot=snapshot) logger.info(f'Comparing source {pool}:{snapshot_path} to {version_uid}.') benji_args = [ 'benji', '--machine-output', '--log-level', benji_log_level, 'deep-scrub', '--source', f'{pool}:{snapshot_path}', version_uid ] result = subprocess_run(benji_args, decode_json=True) assert isinstance(result, dict) return result
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, allow_abbrev=False) parser.add_argument('-f', '--force', dest='force', action='store_true', default=False, help='Overwrite content of existing persistent volumes') parser.add_argument('--pvc-storage-class', metavar='pvc_storage_class', dest='pvc_storage_class', default=None, help='PVC storage class (only takes effect if the PVC does not exist yet)') parser.add_argument('--restore-url-template', metavar='restore_url_template', dest='restore_url_template', help='Template to use for constructing URL for benji restore call', default='rbd:{pool}/{namespace}/{image}') parser.add_argument(metavar='version_uid', dest='version_uid', help='Version uid') parser.add_argument(metavar='pvc_namespace', dest='pvc_namespace', help='PVC namespace') parser.add_argument(metavar='pvc_name', dest='pvc_name', help='PVC name') args = parser.parse_args() benji.k8s_tools.kubernetes.load_config() logger.info(f'Restoring version {args.version_uid} to PVC {args.pvc_namespace}/{args.pvc_name}.') benji_ls = subprocess_run( ['benji', '--machine-output', '--log-level', settings.benji_log_level, 'ls', f'uid == "{args.version_uid}"'], decode_json=True) assert isinstance(benji_ls, dict) assert 'versions' in benji_ls assert isinstance(benji_ls['versions'], list) if len(benji_ls['versions']) == 0: raise RuntimeError(f'Size of {args.version_uid} could not be determined.') assert isinstance(benji_ls['versions'][0], dict) assert isinstance(benji_ls['versions'][0]['size'], int) version_size = benji_ls['versions'][0]['size'] # This assumes that the Kubernetes client has already been initialized core_v1_api = kubernetes.client.CoreV1Api() pvc = None try: pvc = core_v1_api.read_namespaced_persistent_volume_claim(args.pvc_name, args.pvc_namespace) except ApiException as exception: if exception.status != 404: raise RuntimeError(f'Unexpected Kubernetes API exception: {str(exception)}') if pvc is None: pvc = benji.k8s_tools.kubernetes.create_pvc(name=args.pvc_name, namespace=args.pvc_namespace, size=version_size, storage_class=args.pvc_storage_class) else: if not args.force: raise RuntimeError('PVC already exists. Will not overwrite it unless forced.') # I don't really understand why capacity is a regular dict and not an object. Oh, well. pvc_size = int(benji.k8s_tools.kubernetes.parse_quantity(pvc.status.capacity['storage'])) if pvc_size < version_size: raise RuntimeError(f'Existing PVC is too small to hold version {args.version_uid} ({pvc_size} < {version_size}).') elif pvc_size > version_size: logger.warning(f'Existing PVC is {pvc_size - version_size} bytes bigger than version {args.version_uid}.') polls = 0 while polls < PVC_CREATION_MAX_POLLS: pvc = core_v1_api.read_namespaced_persistent_volume_claim(args.pvc_name, args.pvc_namespace) if pvc.status.phase == 'Bound': break time.sleep(PVC_CREATION_POLL_INTERVAL) polls += 1 logger.info('Waiting for persistent volume creation... %d/%d', polls, PVC_CREATION_MAX_POLLS) if pvc.status.phase == 'Bound': logger.info('Persistent volume creation completed.') else: logger.error('Persistent volume creation did not complete after %d seconds.', PVC_CREATION_MAX_POLLS * PVC_CREATION_POLL_INTERVAL) sys.exit(os.EX_CANTCREAT) pv = core_v1_api.read_persistent_volume(pvc.spec.volume_name) rbd_info = benji.k8s_tools.kubernetes.determine_rbd_info_from_pv(pv) if rbd_info is None: raise RuntimeError(f'Unable to determine RBD information for {pv.metadata.name}') print( subprocess_run([ 'benji', '--machine-output', '--log-level', settings.benji_log_level, 'restore', '--sparse', '--force', args.version_uid, args.restore_url_template.format(pool=rbd_info.pool, namespace=rbd_info.namespace, image=rbd_info.image), ])) sys.exit(0)
def backup(*, volume: str, pool: str, image: str, version_labels: Dict[str, str] = {}, version_uid: str = None, context: Any = None): signal_backup_pre.send(SIGNAL_SENDER, volume=volume, pool=pool, image=image, version_labels=version_labels, context=context) version = None try: rbd_snap_ls = subprocess_run( ['rbd', 'snap', 'ls', '--format=json', f'{pool}/{image}'], decode_json=True) assert isinstance(rbd_snap_ls, list) # Snapshot are sorted by their ID, so newer snapshots come last benjis_snapshots = [ snapshot['name'] for snapshot in rbd_snap_ls if snapshot['name'].startswith(RBD_SNAP_NAME_PREFIX) ] if len(benjis_snapshots) == 0: logger.info( 'No previous RBD snapshot found, performing initial backup.') result = backup_initial(volume=volume, pool=pool, image=image, version_uid=version_uid, version_labels=version_labels, context=context) else: # Delete all snapshots except the newest for snapshot in benjis_snapshots[:-1]: logger.info( f'Deleting older RBD snapshot {pool}/{image}@{snapshot}.') subprocess_run( ['rbd', 'snap', 'rm', f'{pool}/{image}@{snapshot}']) last_snapshot = benjis_snapshots[-1] logger.info( f'Newest RBD snapshot is {pool}/{image}@{last_snapshot}.') benji_ls = subprocess_run([ 'benji', '--machine-output', '--log-level', benji_log_level, 'ls', f'volume == "{volume}" and snapshot == "{last_snapshot}" and status == "valid"' ], decode_json=True) assert isinstance(benji_ls, dict) assert 'versions' in benji_ls assert isinstance(benji_ls['versions'], list) if len(benji_ls['versions']) > 0: assert 'uid' in benji_ls['versions'][0] last_version_uid = benji_ls['versions'][0]['uid'] assert isinstance(last_version_uid, str) result = backup_differential(volume=volume, pool=pool, image=image, last_snapshot=last_snapshot, last_version_uid=last_version_uid, version_uid=version_uid, version_labels=version_labels, context=context) else: logger.info( f'Existing RBD snapshot {pool}/{image}@{last_snapshot} not found in Benji, deleting it and reverting to initial backup.' ) subprocess_run( ['rbd', 'snap', 'rm', f'{pool}/{image}@{last_snapshot}']) result = backup_initial(volume=volume, pool=pool, image=image, version_uid=version_uid, version_labels=version_labels, context=context) assert 'versions' in result and isinstance(result['versions'], list) version = result['versions'][0] except Exception as exception: signal_backup_post_error.send(SIGNAL_SENDER, volume=volume, pool=pool, image=image, version_labels=version_labels, context=context, version=version, exception=exception) else: signal_backup_post_success.send(SIGNAL_SENDER, volume=volume, pool=pool, image=image, version_labels=version_labels, context=context, version=version)