示例#1
0
def main():
    command = ' '.join(sys.argv[1:])
    start_time = time.time()

    prometheus.command_start_time.labels(command=command).set(start_time)
    try:
        utils.subprocess_run(
            ['benji', '--log-level', settings.benji_log_level] + sys.argv[1:])
    except Exception as exception:
        prometheus.command_status_failed.labels(command=command).set(1)
        completion_time = time.time()
        prometheus.command_completion_time.labels(
            command=command).set(completion_time)
        prometheus.command_runtime_seconds.labels(
            command=command).set(completion_time - start_time)
        prometheus.push(prometheus.command_registry)
        raise exception
    else:
        prometheus.command_status_succeeded.labels(command=command).set(1)
        completion_time = time.time()
        prometheus.command_completion_time.labels(
            command=command).set(completion_time)
        prometheus.command_runtime_seconds.labels(
            command=command).set(completion_time - start_time)
        prometheus.push(prometheus.command_registry)
        sys.exit(0)
示例#2
0
def main():
    incomplete_versions = subprocess_run([
        'benji',
        '--machine-output',
        '--log-level',
        settings.benji_log_level,
        'ls',
        'status == "incomplete" and date < "1 day ago"',
    ],
                                         decode_json=True)

    invalid_versions = subprocess_run([
        'benji',
        '--machine-output',
        '--log-level',
        settings.benji_log_level,
        'ls',
        'status == "invalid"',
    ],
                                      decode_json=True)

    prometheus.older_incomplete_versions.set(
        len(incomplete_versions['versions']))
    prometheus.invalid_versions.set(len(invalid_versions['versions']))
    prometheus.push(prometheus.version_status_registry, grouping_key={})
    sys.exit(0)
示例#3
0
def snapshot_create(*,
                    volume: str,
                    pool: str,
                    image: str,
                    snapshot: str,
                    context: Any = None):
    signal_snapshot_create_pre.send(SIGNAL_SENDER,
                                    volume=volume,
                                    pool=pool,
                                    image=image,
                                    snapshot=snapshot,
                                    context=context)
    try:
        subprocess_run(['rbd', 'snap', 'create', f'{pool}/{image}@{snapshot}'],
                       timeout=RBD_SNAP_CREATE_TIMEOUT)
    except Exception as exception:
        signal_snapshot_create_post_error.send(SIGNAL_SENDER,
                                               volume=volume,
                                               pool=pool,
                                               image=image,
                                               snapshot=snapshot,
                                               context=context,
                                               exception=exception)
    else:
        signal_snapshot_create_post_success.send(SIGNAL_SENDER,
                                                 volume=volume,
                                                 pool=pool,
                                                 image=image,
                                                 snapshot=snapshot,
                                                 context=context)
示例#4
0
def backup_differential(*,
                        version_name: str,
                        pool: str,
                        image: str,
                        last_snapshot: str,
                        last_version_uid: int,
                        version_labels: Dict[str, str],
                        context: Any = None) -> Dict[str, str]:
    logger.info(f'Performing differential backup of {version_name}:{pool}/{image} from RBD snapshot" \
        "{last_snapshot} and Benji version V{last_version_uid:09d}.')

    now = datetime.utcnow()
    snapshot = now.strftime(RBD_SNAP_NAME_PREFIX + '%Y-%m-%dT%H:%M:%SZ')

    snapshot_create(version_name=version_name, pool=pool, image=image, snapshot=snapshot, context=context)
    stdout = subprocess_run(
        ['rbd', 'diff', '--whole-object', '--format=json', '--from-snap', last_snapshot, f'{pool}/{image}@{snapshot}'])
    subprocess_run(['rbd', 'snap', 'rm', f'{pool}/{image}@{last_snapshot}'])

    with NamedTemporaryFile(mode='w+', encoding='utf-8') as rbd_hints:
        rbd_hints.write(stdout)
        rbd_hints.flush()
        benji_args = [
            'benji', '--machine-output', '--log-level', benji_log_level, 'backup', '--snapshot-name', snapshot,
            '--rbd-hints', rbd_hints.name, '--base-version',
            str(last_version_uid)
        ]
        for label_name, label_value in version_labels.items():
            benji_args.extend(['--label', f'{label_name}={label_value}'])
        benji_args.extend([f'{pool}:{pool}/{image}@{snapshot}', version_name])
        result = subprocess_run(benji_args, decode_json=True)
        assert isinstance(result, dict)

    return result
示例#5
0
def backup_initial(*,
                   volume: str,
                   pool: str,
                   namespace: str = '',
                   image: str,
                   version_labels: Dict[str, str],
                   version_uid: Optional[str],
                   source_compare: bool = False,
                   context: Any = None) -> Dict[str, str]:

    now = datetime.utcnow()
    snapshot = now.strftime(RBD_SNAP_NAME_PREFIX + '%Y-%m-%dT%H:%M:%SZ')
    image_path = _rbd_image_path(pool=pool, namespace=namespace, image=image)
    snapshot_path = _rbd_image_path(pool=pool,
                                    namespace=namespace,
                                    image=image,
                                    snapshot=snapshot)
    logger.info(f'Performing initial backup of {volume}:{image_path}')

    snapshot_create(volume=volume,
                    pool=pool,
                    namespace=namespace,
                    image=image,
                    snapshot=snapshot,
                    context=context)
    stdout = subprocess_run(
        ['rbd', 'diff', '--whole-object', '--format=json', snapshot_path])

    with NamedTemporaryFile(mode='w+', encoding='utf-8') as rbd_hints:
        assert isinstance(stdout, str)
        rbd_hints.write(stdout)
        rbd_hints.flush()
        benji_args = [
            'benji', '--machine-output', '--log-level', benji_log_level,
            'backup', '--snapshot', snapshot, '--rbd-hints', rbd_hints.name
        ]
        if version_uid is not None:
            benji_args.extend(['--uid', version_uid])
        for label_name, label_value in version_labels.items():
            benji_args.extend(['--label', f'{label_name}={label_value}'])
        benji_args.extend([f'{pool}:{snapshot_path}', volume])
        result = subprocess_run(benji_args, decode_json=True)
        assert isinstance(result, dict)

    if source_compare:
        # We won't evaluate the returned result but any failure will raise an exception.
        deep_scrub(pool=pool,
                   namespace=namespace,
                   image=image,
                   snapshot=snapshot,
                   version_uid=version_uid)

    return result
示例#6
0
def backup_initial(*,
                   volume: str,
                   pool: str,
                   image: str,
                   version_labels: Dict[str, str],
                   version_uid: Optional[str],
                   context: Any = None) -> Dict[str, str]:
    logger.info(f'Performing initial backup of {volume}:{pool}/{image}')

    now = datetime.utcnow()
    snapshot = now.strftime(RBD_SNAP_NAME_PREFIX + '%Y-%m-%dT%H:%M:%SZ')

    snapshot_create(volume=volume,
                    pool=pool,
                    image=image,
                    snapshot=snapshot,
                    context=context)
    stdout = subprocess_run([
        'rbd', 'diff', '--whole-object', '--format=json',
        f'{pool}/{image}@{snapshot}'
    ])

    with NamedTemporaryFile(mode='w+', encoding='utf-8') as rbd_hints:
        assert isinstance(stdout, str)
        rbd_hints.write(stdout)
        rbd_hints.flush()
        benji_args = [
            'benji', '--machine-output', '--log-level', benji_log_level,
            'backup', '--snapshot', snapshot, '--rbd-hints', rbd_hints.name
        ]
        if version_uid is not None:
            benji_args.extend(['--uid', version_uid])
        for label_name, label_value in version_labels.items():
            benji_args.extend(['--label', f'{label_name}={label_value}'])
        benji_args.extend([f'{pool}:{pool}/{image}@{snapshot}', volume])
        result = subprocess_run(benji_args, decode_json=True)
        assert isinstance(result, dict)

    return result
示例#7
0
def snapshot_create(*,
                    volume: str,
                    pool: str,
                    namespace: str = '',
                    image: str,
                    snapshot: str,
                    context: Any = None):
    signal_snapshot_create_pre.send(SIGNAL_SENDER,
                                    volume=volume,
                                    pool=pool,
                                    namespace=namespace,
                                    image=image,
                                    snapshot=snapshot,
                                    context=context)
    snapshot_path = _rbd_image_path(pool=pool,
                                    namespace=namespace,
                                    image=image,
                                    snapshot=snapshot)
    try:
        subprocess_run(
            ['rbd', 'snap', 'create', '--no-progress', snapshot_path],
            timeout=RBD_SNAP_CREATE_TIMEOUT)
    except Exception as exception:
        signal_snapshot_create_post_error.send(SIGNAL_SENDER,
                                               volume=volume,
                                               pool=pool,
                                               namespace=namespace,
                                               image=image,
                                               snapshot=snapshot,
                                               context=context,
                                               exception=exception)
    else:
        signal_snapshot_create_post_success.send(SIGNAL_SENDER,
                                                 volume=volume,
                                                 pool=pool,
                                                 namespace=namespace,
                                                 image=image,
                                                 snapshot=snapshot,
                                                 context=context)
示例#8
0
def deep_scrub(*,
               pool: str,
               namespace: str = '',
               image: str,
               snapshot: str,
               version_uid: Optional[str]) -> Dict[str, str]:
    snapshot_path = _rbd_image_path(pool=pool,
                                    namespace=namespace,
                                    image=image,
                                    snapshot=snapshot)
    logger.info(f'Comparing source {pool}:{snapshot_path} to {version_uid}.')

    benji_args = [
        'benji', '--machine-output', '--log-level', benji_log_level,
        'deep-scrub', '--source', f'{pool}:{snapshot_path}', version_uid
    ]

    result = subprocess_run(benji_args, decode_json=True)
    assert isinstance(result, dict)

    return result
示例#9
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, allow_abbrev=False)

    parser.add_argument('-f',
                        '--force',
                        dest='force',
                        action='store_true',
                        default=False,
                        help='Overwrite content of existing persistent volumes')
    parser.add_argument('--pvc-storage-class',
                        metavar='pvc_storage_class',
                        dest='pvc_storage_class',
                        default=None,
                        help='PVC storage class (only takes effect if the PVC does not exist yet)')
    parser.add_argument('--restore-url-template',
                        metavar='restore_url_template',
                        dest='restore_url_template',
                        help='Template to use for constructing URL for benji restore call',
                        default='rbd:{pool}/{namespace}/{image}')
    parser.add_argument(metavar='version_uid', dest='version_uid', help='Version uid')
    parser.add_argument(metavar='pvc_namespace', dest='pvc_namespace', help='PVC namespace')
    parser.add_argument(metavar='pvc_name', dest='pvc_name', help='PVC name')

    args = parser.parse_args()

    benji.k8s_tools.kubernetes.load_config()

    logger.info(f'Restoring version {args.version_uid} to PVC {args.pvc_namespace}/{args.pvc_name}.')

    benji_ls = subprocess_run(
        ['benji', '--machine-output', '--log-level', settings.benji_log_level, 'ls', f'uid == "{args.version_uid}"'],
        decode_json=True)
    assert isinstance(benji_ls, dict)
    assert 'versions' in benji_ls
    assert isinstance(benji_ls['versions'], list)

    if len(benji_ls['versions']) == 0:
        raise RuntimeError(f'Size of {args.version_uid} could not be determined.')

    assert isinstance(benji_ls['versions'][0], dict)
    assert isinstance(benji_ls['versions'][0]['size'], int)
    version_size = benji_ls['versions'][0]['size']

    # This assumes that the Kubernetes client has already been initialized
    core_v1_api = kubernetes.client.CoreV1Api()
    pvc = None
    try:
        pvc = core_v1_api.read_namespaced_persistent_volume_claim(args.pvc_name, args.pvc_namespace)
    except ApiException as exception:
        if exception.status != 404:
            raise RuntimeError(f'Unexpected Kubernetes API exception: {str(exception)}')

    if pvc is None:
        pvc = benji.k8s_tools.kubernetes.create_pvc(name=args.pvc_name,
                                                    namespace=args.pvc_namespace,
                                                    size=version_size,
                                                    storage_class=args.pvc_storage_class)
    else:
        if not args.force:
            raise RuntimeError('PVC already exists. Will not overwrite it unless forced.')

        # I don't really understand why capacity is a regular dict and not an object. Oh, well.
        pvc_size = int(benji.k8s_tools.kubernetes.parse_quantity(pvc.status.capacity['storage']))
        if pvc_size < version_size:
            raise RuntimeError(f'Existing PVC is too small to hold version {args.version_uid} ({pvc_size} < {version_size}).')
        elif pvc_size > version_size:
            logger.warning(f'Existing PVC is {pvc_size - version_size} bytes bigger than version {args.version_uid}.')

    polls = 0
    while polls < PVC_CREATION_MAX_POLLS:
        pvc = core_v1_api.read_namespaced_persistent_volume_claim(args.pvc_name, args.pvc_namespace)
        if pvc.status.phase == 'Bound':
            break
        time.sleep(PVC_CREATION_POLL_INTERVAL)
        polls += 1
        logger.info('Waiting for persistent volume creation... %d/%d', polls, PVC_CREATION_MAX_POLLS)
    if pvc.status.phase == 'Bound':
        logger.info('Persistent volume creation completed.')
    else:
        logger.error('Persistent volume creation did not complete after %d seconds.',
                     PVC_CREATION_MAX_POLLS * PVC_CREATION_POLL_INTERVAL)
        sys.exit(os.EX_CANTCREAT)

    pv = core_v1_api.read_persistent_volume(pvc.spec.volume_name)
    rbd_info = benji.k8s_tools.kubernetes.determine_rbd_info_from_pv(pv)
    if rbd_info is None:
        raise RuntimeError(f'Unable to determine RBD information for {pv.metadata.name}')

    print(
        subprocess_run([
            'benji',
            '--machine-output',
            '--log-level',
            settings.benji_log_level,
            'restore',
            '--sparse',
            '--force',
            args.version_uid,
            args.restore_url_template.format(pool=rbd_info.pool, namespace=rbd_info.namespace, image=rbd_info.image),
        ]))
    sys.exit(0)
示例#10
0
def backup(*,
           volume: str,
           pool: str,
           image: str,
           version_labels: Dict[str, str] = {},
           version_uid: str = None,
           context: Any = None):
    signal_backup_pre.send(SIGNAL_SENDER,
                           volume=volume,
                           pool=pool,
                           image=image,
                           version_labels=version_labels,
                           context=context)
    version = None
    try:
        rbd_snap_ls = subprocess_run(
            ['rbd', 'snap', 'ls', '--format=json', f'{pool}/{image}'],
            decode_json=True)
        assert isinstance(rbd_snap_ls, list)
        # Snapshot are sorted by their ID, so newer snapshots come last
        benjis_snapshots = [
            snapshot['name'] for snapshot in rbd_snap_ls
            if snapshot['name'].startswith(RBD_SNAP_NAME_PREFIX)
        ]
        if len(benjis_snapshots) == 0:
            logger.info(
                'No previous RBD snapshot found, performing initial backup.')
            result = backup_initial(volume=volume,
                                    pool=pool,
                                    image=image,
                                    version_uid=version_uid,
                                    version_labels=version_labels,
                                    context=context)
        else:
            # Delete all snapshots except the newest
            for snapshot in benjis_snapshots[:-1]:
                logger.info(
                    f'Deleting older RBD snapshot {pool}/{image}@{snapshot}.')
                subprocess_run(
                    ['rbd', 'snap', 'rm', f'{pool}/{image}@{snapshot}'])

            last_snapshot = benjis_snapshots[-1]
            logger.info(
                f'Newest RBD snapshot is {pool}/{image}@{last_snapshot}.')

            benji_ls = subprocess_run([
                'benji', '--machine-output', '--log-level', benji_log_level,
                'ls',
                f'volume == "{volume}" and snapshot == "{last_snapshot}" and status == "valid"'
            ],
                                      decode_json=True)
            assert isinstance(benji_ls, dict)
            assert 'versions' in benji_ls
            assert isinstance(benji_ls['versions'], list)
            if len(benji_ls['versions']) > 0:
                assert 'uid' in benji_ls['versions'][0]
                last_version_uid = benji_ls['versions'][0]['uid']
                assert isinstance(last_version_uid, str)
                result = backup_differential(volume=volume,
                                             pool=pool,
                                             image=image,
                                             last_snapshot=last_snapshot,
                                             last_version_uid=last_version_uid,
                                             version_uid=version_uid,
                                             version_labels=version_labels,
                                             context=context)
            else:
                logger.info(
                    f'Existing RBD snapshot {pool}/{image}@{last_snapshot} not found in Benji, deleting it and reverting to initial backup.'
                )
                subprocess_run(
                    ['rbd', 'snap', 'rm', f'{pool}/{image}@{last_snapshot}'])
                result = backup_initial(volume=volume,
                                        pool=pool,
                                        image=image,
                                        version_uid=version_uid,
                                        version_labels=version_labels,
                                        context=context)
        assert 'versions' in result and isinstance(result['versions'], list)
        version = result['versions'][0]
    except Exception as exception:
        signal_backup_post_error.send(SIGNAL_SENDER,
                                      volume=volume,
                                      pool=pool,
                                      image=image,
                                      version_labels=version_labels,
                                      context=context,
                                      version=version,
                                      exception=exception)
    else:
        signal_backup_post_success.send(SIGNAL_SENDER,
                                        volume=volume,
                                        pool=pool,
                                        image=image,
                                        version_labels=version_labels,
                                        context=context,
                                        version=version)