def main(): command = ' '.join(sys.argv[1:]) start_time = time.time() prometheus.command_start_time.labels(command=command).set(start_time) try: utils.subprocess_run( ['benji', '--log-level', settings.benji_log_level] + sys.argv[1:]) except Exception as exception: prometheus.command_status_failed.labels(command=command).set(1) completion_time = time.time() prometheus.command_completion_time.labels( command=command).set(completion_time) prometheus.command_runtime_seconds.labels( command=command).set(completion_time - start_time) prometheus.push(prometheus.command_registry) raise exception else: prometheus.command_status_succeeded.labels(command=command).set(1) completion_time = time.time() prometheus.command_completion_time.labels( command=command).set(completion_time) prometheus.command_runtime_seconds.labels( command=command).set(completion_time - start_time) prometheus.push(prometheus.command_registry) sys.exit(0)
def main(): incomplete_versions = subprocess_run([ 'benji', '--machine-output', '--log-level', settings.benji_log_level, 'ls', 'status == "incomplete" and date < "1 day ago"', ], decode_json=True) invalid_versions = subprocess_run([ 'benji', '--machine-output', '--log-level', settings.benji_log_level, 'ls', 'status == "invalid"', ], decode_json=True) prometheus.older_incomplete_versions.set( len(incomplete_versions['versions'])) prometheus.invalid_versions.set(len(invalid_versions['versions'])) prometheus.push(prometheus.version_status_registry, grouping_key={}) sys.exit(0)
def ceph_backup_post_success(sender: str, volume: str, pool: str, namespace: str, image: str, version_labels: Dict[str, str], context: Dict[str, Any], version: Optional[Dict]): assert isinstance(context, dict) assert version is not None pvc_namespace = context['pvc'].metadata.namespace pvc_name = context['pvc'].metadata.name pvc_uid = context['pvc'].metadata.uid start_time = context['backup-start-time'] completion_time = time.time() prometheus.backup_completion_time.labels(volume=volume).set(completion_time) prometheus.backup_runtime_seconds.labels(volume=volume).set(completion_time - start_time) prometheus.backup_status_succeeded.labels(volume=volume).set(1) prometheus.push(prometheus.backup_registry, grouping_key={'pvc_namespace': pvc_namespace, 'pvc_name': pvc_name}) try: benji.k8s_tools.kubernetes.create_pvc_event( type='Normal', reason='SuccessfulBackup', message=f'Backup to {version["uid"]} completed successfully (took {completion_time - start_time:.0f} seconds).', pvc_namespace=pvc_namespace, pvc_name=pvc_name, pvc_uid=pvc_uid) except Exception as exception: logger.error(f'Creating Kubernetes event for {pvc_namespace}/{pvc_name} failed with a {exception.__class__.__name__} exception: {str(exception)}') pass
def ceph_backup_post_error(sender: str, volume: str, pool: str, namespace: str, image: str, version_labels: Dict[str, str], context: Dict[str, Any], version: Optional[Dict], exception: Exception): assert isinstance(context, dict) pvc_namespace = context['pvc'].metadata.namespace pvc_name = context['pvc'].metadata.name pvc_uid = context['pvc'].metadata.uid start_time = context['backup-start-time'] completion_time = time.time() prometheus.backup_completion_time.labels(volume=volume).set(completion_time) prometheus.backup_runtime_seconds.labels(volume=volume).set(completion_time - start_time) prometheus.backup_status_failed.labels(volume=volume).set(1) prometheus.push(prometheus.backup_registry, grouping_key={'pvc_namespace': pvc_namespace, 'pvc_name': pvc_name}) benji.k8s_tools.kubernetes.create_pvc_event(type='Warning', reason='FailedBackup', message=f'Backup failed: {exception.__class__.__name__} {str(exception)}', pvc_namespace=pvc_namespace, pvc_name=pvc_name, pvc_uid=pvc_uid) raise exception
def main(): # This arguments parser tries to mimic kubectl parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, allow_abbrev=False) parser.add_argument('-n', '--namespace', metavar='namespace', dest='namespace', default=None, help='Filter on namespace') parser.add_argument('-l', '--selector', metavar='label-selector', dest='labels', action='append', default=[], help='Filter PVCs on label selector') parser.add_argument('--field-selector', metavar='field-selector', dest='fields', action='append', default=[], help='Filter PVCs on field selector') args = parser.parse_args() benji.k8s_tools.kubernetes.load_config() core_v1_api = kubernetes.client.CoreV1Api() labels = ','.join(args.labels) fields = ','.join(args.fields) if args.namespace is not None: logger.info(f'Backing up all PVCs in namespace {args.namespace}.') else: logger.info(f'Backing up all PVCs in all namespaces.') if labels != '': logger.info(f'Matching label(s) {labels}.') if fields != '': logger.info(f'Matching field(s) {fields}.') if args.namespace is not None: pvcs = core_v1_api.list_namespaced_persistent_volume_claim( args.namespace, watch=False, label_selector=labels, field_selector=fields).items else: pvcs = core_v1_api.list_persistent_volume_claim_for_all_namespaces( watch=False, label_selector=labels, field_selector=fields).items if len(pvcs) == 0: logger.info('Not matching PVCs found.') sys.exit(0) for pvc in pvcs: if not hasattr(pvc.spec, 'volume_name') or pvc.spec.volume_name in (None, ''): continue pv = core_v1_api.read_persistent_volume(pvc.spec.volume_name) pool, image = benji.k8s_tools.kubernetes.determine_rbd_image_from_pv( pv) if pool is None or image is None: continue volume = f'{pvc.metadata.namespace}/{pvc.metadata.name}' # Limit the version_uid to 253 characters so that it is a compatible Kubernetes resource name. version_uid = '{}-{}'.format( f'{pvc.metadata.namespace}-{pvc.metadata.name}'[:246], _random_string(6)) version_labels = { 'benji-backup.me/instance': settings.benji_instance, 'benji-backup.me/ceph-pool': pool, 'benji-backup.me/ceph-rbd-image': image, 'benji-backup.me/k8s-pvc-namespace': pvc.metadata.namespace, 'benji-backup.me/k8s-pvc': pvc.metadata.name, 'benji-backup.me/k8s-pv': pv.metadata.name } context = {'pvc': pvc} ceph.backup(volume=volume, pool=pool, image=image, version_uid=version_uid, version_labels=version_labels, context=context) prometheus.push(prometheus.backup_registry) sys.exit(0)