def subparser(subparsers):
    '''Register subparser modules'''
    stopparser = subparsers.add_parser('stop',
                                       help='Stop RADOS-Ceph on a cluster.')
    stopparser.add_argument('--admin',
                            metavar='id',
                            dest='admin_id',
                            type=int,
                            default=None,
                            help='ID of the Ceph admin node.')
    stopparser.add_argument(
        '--mountpoint',
        metavar='path',
        type=str,
        default=start_defaults.mountpoint_path(),
        help='Mountpoint for CephFS on all nodes (default={}).'.format(
            start_defaults.mountpoint_path()))
    stopparser.add_argument('--silent',
                            help='If set, less output is shown.',
                            action='store_true')

    subsubparsers = stopparser.add_subparsers(help='Subsubcommands',
                                              dest='subcommand')

    memstoreparser = subsubparsers.add_parser(
        'memstore', help='''Stop a memstore cluster.''')
    bluestoreparser = subsubparsers.add_parser(
        'bluestore', help='''Stop a bluestore cluster.''')
    return [stopparser, memstoreparser, bluestoreparser]
def subparser(subparsers):
    '''Register subparser modules'''
    deployparser = subparsers.add_parser(
        'deploy', help='Deploy data on a RADOS-Ceph cluster.')
    deployparser.add_argument(
        'paths',
        metavar='path',
        type=str,
        nargs='+',
        help='Data path(s) to deploy on the remote cluster.')
    deployparser.add_argument('--admin',
                              metavar='id',
                              dest='admin_id',
                              type=int,
                              default=None,
                              help='ID of the Ceph admin node.')
    deployparser.add_argument(
        '--mountpoint',
        metavar='path',
        type=str,
        default=start_defaults.mountpoint_path(),
        help='Mountpoint for CephFS on all nodes (default={}).'.format(
            start_defaults.mountpoint_path()))
    deployparser.add_argument(
        '--stripe',
        metavar='amount',
        type=int,
        default=defaults.stripe(),
        help=
        'Striping, in megabytes (default={}MB). Must be a multiple of 4. Make sure that every file is smaller than set stripe size.'
        .format(defaults.stripe()))
    deployparser.add_argument(
        '--copy-multiplier',
        metavar='amount',
        dest='copy_multiplier',
        type=int,
        default=1,
        help=
        'Copy multiplier (default=1). Every file will be copied "amount"-1 times on the remote, to make the data look "amount" times larger. This multiplier is applied first.'
    )
    deployparser.add_argument(
        '--link-multiplier',
        metavar='amount',
        dest='link_multiplier',
        type=int,
        default=1,
        help=
        'Link multiplier (default=1). Every file will receive "amount"-1 hardlinks on the remote, to make the data look "amount" times larger. This multiplier is applied second. Note that we first apply the copy multiplier, meaning: the link multiplier is applied on copies of files, and the dataset inflation stacks.'
    )
    deployparser.add_argument('--silent',
                              help='If set, less boot output is shown.',
                              action='store_true')
    return [deployparser]
Пример #3
0
def subparser(subparsers):
    '''Register subparser modules'''
    deployparser = subparsers.add_parser(
        'deploy', help='Deploy data generators on a RADOS-Ceph cluster.')
    submitparser.add_argument(
        'cmd',
        metavar='cmd',
        type=str,
        help=
        'Command to execute on the remote cluster. Note: $JAVA_HOME/bin/java is available for java applications. python3 is available for python applications. If you need to use flags in the command with "-" signs, use e.g. "-- -h" to ignore "-" signs for the rest of the command.'
    )
    deployparser.add_argument('--admin',
                              metavar='id',
                              dest='admin_id',
                              type=int,
                              default=None,
                              help='ID of the Ceph admin node.')
    deployparser.add_argument(
        '--paths',
        metavar='path',
        type=str,
        nargs='+',
        help=
        'Path(s) to applications to deploy on the remote cluster. Given applications will be available in the CWD for command execution.'
    )
    deployparser.add_argument(
        '--mountpoint',
        metavar='path',
        type=str,
        default=start_defaults.mountpoint_path(),
        help='Mountpoint for CephFS on all nodes (default={}).'.format(
            start_defaults.mountpoint_path()))
    deployparser.add_argument(
        '--stripe',
        metavar='amount',
        type=int,
        default=defaults.stripe(),
        help=
        'Striping, in megabytes (default={}MB). Must be a multiple of 4. Make sure that every file is smaller than set stripe size.'
        .format(defaults.stripe()))
    deployparser.add_argument(
        '--multiplier',
        metavar='amount',
        type=int,
        default=1,
        help=
        'Data multiplier (default=1). Every file copied will receive "amount"-1 of hardlinks, to make the data look "amount" times larger.'
    )
    deployparser.add_argument('--silent',
                              help='If set, less boot output is shown.',
                              action='store_true')
    return [deployparser]
Пример #4
0
def subparser(subparsers):
    '''Register subparser modules'''
    deployparser = subparsers.add_parser('clean', help='Clean data from a RADOS-Ceph a cluster.')
    deployparser.add_argument('paths', metavar='paths', type=str, nargs='*', help='Data path(s) to clean on the remote cluster (mountpoint path will be prepended). If no paths given, removes all data on remote.')
    deployparser.add_argument('--admin', metavar='id', dest='admin_id', type=int, default=None, help='ID of the Ceph admin node.')
    deployparser.add_argument('--mountpoint', metavar='path', type=str, default=start_defaults.mountpoint_path(), help='Mountpoint for CephFS on all nodes (default={}).'.format(start_defaults.mountpoint_path()))
    deployparser.add_argument('--silent', help='If set, less boot output is shown.', action='store_true')
    return [deployparser]
def deploy(reservation,
           paths=None,
           key_path=None,
           admin_id=None,
           connectionwrapper=None,
           stripe=defaults.stripe(),
           copy_multiplier=1,
           link_multiplier=1,
           mountpoint_path=start_defaults.mountpoint_path(),
           silent=False):
    '''Deploy data on remote RADOS-Ceph clusters, on an existing reservation.
    Dataset sizes can be inflated on the remote, using 2 strategies:
     1. link multiplication: Every dataset file receives `x` hardlinks.
        The hardlinks ensure the dataset size appears to be `x` times larger, but in reality, just the original file consumes space.
        This method is very fast, but has drawbacks: Only the original files are stored by Ceph.
        When using the RADOS-Arrow connector, this means Arrow will spam only the nodes that contain the original data.
        E.g: If we deploy 1 file of 64MB, with link multiplier 1024, the data will apear to be 64GB.
             The storage space used on RADOS-Ceph will still be 64MB, because we have 1 real file of 64MB, and 1023 hardlinks to that 1 file.
             The actual data is only stored on 3 OSDs (with default Ceph Striping factor 3).
             Now, Arrow will spam all work to those 3 OSDs containing the data, while the rest is idle.
     2. file multiplication: Every dataset file receives `x` copies.
        This method is slower than the one listed above, because real data has to be copied. 
        It also actually increases storage usage, contrary to above. 
        However, because we multiply real data, the load is guaranteed to be balanced across nodes, as far as Ceph does that.

    Note that mutiple multiplication techniques can be combined, in which case they stack.
    E.g: If we deploy 1 file of 64MB, with a copy multiplier 4 and a link multiplier 1024, we get 4 real files (1 original + 3 copies),
         and each file gets 1023 hardlinks assigned to it.
    Args:
        reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on.
        key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        connectionwrapper (optional RemotoSSHWrapper): If set, uses given connection, instead of building a new one.
        paths (optional list(str)): Data paths to offload to the remote cluster. Can be relative to CWD or absolute.
        stripe (optional int): Ceph object stripe property, in megabytes.
        copy_multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by copying every file `x`-1 times. Does nothing if `x`<=1.
        link_multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by adding `x`-1 hardlinks for every transferred file. Does nothing if `x`<=1.
        mountpoint_path (optional str): Path where CephFS is mounted on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.

    Returns:
        `True` on success, `False` otherwise.'''
    module = importer.import_full_path(
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'data_deploy',
                'rados_deploy.deploy.plugin.py'))
    args = []
    kwargs = {
        'admin_id': admin_id,
        'connectionwrapper': connectionwrapper,
        'stripe': stripe,
        'copy_multiplier': copy_multiplier,
        'link_multiplier': link_multiplier
    }
    return module.execute(reservation, key_path, paths, dest, silent, *args,
                          **kwargs)
def memstore(reservation,
             key_path=None,
             admin_id=None,
             connectionwrapper=None,
             mountpoint_path=start_defaults.mountpoint_path(),
             silent=False):
    '''Stop a running RADOS-Ceph cluster using memstore.
    Args:
        reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on.
        key_path (str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        connectionwrapper (optional RemotoSSHWrapper): If set, uses given connection, instead of building a new one.
        mountpoint_path (optional str): Path where CephFS is mounted on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.
        retries (optional int): Number of tries we try to perform potentially-crashing operations.

    Returns:
        `True` on success, `False` otherwise.'''
    if (not reservation) or len(reservation) == 0:
        raise ValueError('Reservation does not contain any items' +
                         (' (reservation=None)' if not reservation else ''))

    admin_picked, _ = _pick_admin(reservation, admin=admin_id)
    print('Picked admin node: {}'.format(admin_picked))

    local_connections = connectionwrapper == None
    if local_connections:
        ssh_kwargs = {
            'IdentitiesOnly': 'yes',
            'User': admin_picked.extra_info['user'],
            'StrictHostKeyChecking': 'no'
        }
        if key_path:
            ssh_kwargs['IdentityFile'] = key_path
        connectionwrapper = get_wrapper(admin_picked,
                                        admin_picked.ip_public,
                                        silent=silent,
                                        ssh_params=ssh_kwargs)

    rados_module = _generate_module_stop()
    state_ok = _stop_rados(connectionwrapper.connection,
                           rados_module,
                           reservation,
                           mountpoint_path,
                           silent=silent)

    if local_connections:
        close_wrappers([connectionwrapper])
    if state_ok:
        prints('Stopping RADOS-Ceph succeeded.')
        return True
    else:
        printe('Stopping RADOS-Ceph failed on some nodes.')
        return False
def generate(reservation,
             key_path=None,
             admin_id=None,
             cmd=None,
             paths=None,
             stripe=defaults.stripe(),
             multiplier=1,
             mountpoint_path=start_defaults.mountpoint_path(),
             silent=False):
    '''Deploy data on the RADOS-Ceph cluster, on an existing reservation.
    Args:
        reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on.
        key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        cmd (optional str): Command to execute on the remote cluster to generate the data.
        paths (optional list(str)): Data paths to offload to the remote cluster. Can be relative to CWD or absolute.
        stripe (optional int): Ceph object stripe property, in megabytes.
        multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by adding `x`-1 hardlinks for every transferred file. Does nothing if `x`<=1.
        mountpoint_path (optional str): Path where CephFS is mounted on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.

    Returns:
        `True` on success, `False` otherwise.'''
    if not reservation or len(reservation) == 0:
        raise ValueError('Reservation does not contain any items' +
                         (' (reservation=None)' if not reservation else ''))
    if stripe < 4:
        raise ValueError(
            'Stripe size must be equal to or greater than 4MB (and a multiple of 4MB)!'
        )
    if stripe % 4 != 0:
        raise ValueError('Stripe size must be a multiple of 4MB!')
    if not cmd:
        raise ValueError('Command to generate data not provided.')
    raise NotImplementedError
    return True
def clean(reservation,
          paths,
          key_path=None,
          admin_id=None,
          connectionwrapper=None,
          mountpoint_path=start_defaults.mountpoint_path(),
          silent=False):
    '''Cleans data from the RADOS-Ceph cluster, on an existing reservation.
    Args:
        reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on.
        paths (list(str)): Data paths to delete to the remote cluster. Mountpoint path is always prepended.
        key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        mountpoint_path (optional str): Path where CephFS is mounted on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.

    Returns:
        `True` on success, `False` otherwise.'''
    if (not reservation) or len(reservation) == 0:
        raise ValueError('Reservation does not contain any items' +
                         (' (reservation=None)' if not reservation else ''))

    admin_picked, _ = _pick_admin(reservation, admin=admin_id)
    print('Picked admin node: {}'.format(admin_picked))

    local_connections = connectionwrapper == None
    if local_connections:
        ssh_kwargs = {
            'IdentitiesOnly': 'yes',
            'User': admin_picked.extra_info['user'],
            'StrictHostKeyChecking': 'no'
        }
        if key_path:
            ssh_kwargs['IdentityFile'] = key_path

        connectionwrapper = get_wrapper(admin_picked.ip_public,
                                        silent=True,
                                        ssh_params=ssh_kwargs)

    if not any(paths):
        _, _, exitcode = remoto.process.check(
            connectionwrapper.connection,
            'sudo rm -rf {}/*'.format(mountpoint_path),
            shell=True)
        state_ok = exitcode == 0
    else:
        paths = [x if x[0] != '/' else x[1:] for x in paths]
        with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count() -
                                                   1) as executor:
            if not silent:
                print('Deleting data...')
            futures_rm = [
                executor.submit(remoto.process.check,
                                connectionwrapper.connection,
                                'sudo rm -rf {}'.format(
                                    fs.join(mountpoint_path, path)),
                                shell=True) for path in paths
            ]

            state_ok = all(x.result()[2] == 0 for x in futures_rm)

    if state_ok:
        prints('Data deleted.')
    else:
        printe('Could not delete data.')
    if local_connections:
        close_wrappers([connectionwrapper])
    return state_ok
Пример #9
0
def bluestore(reservation,
              key_path=None,
              admin_id=None,
              connectionwrapper=None,
              mountpoint_path=defaults.mountpoint_path(),
              osd_op_threads=defaults.osd_op_threads(),
              osd_pool_size=defaults.osd_pool_size(),
              osd_max_obj_size=defaults.osd_max_obj_size(),
              placement_groups=None,
              use_client_cache=True,
              device_path=None,
              silent=False,
              retries=defaults.retries()):
    '''Boot RADOS-Ceph on an existing reservation, running bluestore.
    Requires either a "device_path" key to be set in the extra info of all OSD nodes, or the "device_path" parameter must be set.
    Should point to device to use with bluestore on all nodes.
    Args:
        reservation (metareserve.Reservation): Reservation object with all nodes to start RADOS-Ceph on.
        key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        connectionwrapper (optional RemotoSSHWrapper): If set, uses given connection, instead of building a new one.
        mountpoint_path (optional str): Path where CephFS will be mounted on all nodes.
        osd_op_threads (optional int): Number of op threads to use for each OSD. Make sure this number is not greater than the amount of cores each OSD has.
        osd_pool_size (optional int): Fragmentation of object to given number of OSDs. Must be less than or equal to amount of OSDs.
        osd_max_obj_size (int): Maximal object size in bytes. Normal=128*1024*1024 (128MB).
        placement_groups (optional int): Amount of placement groups in Ceph. If not set, we use the recommended formula `(num osds * 100) / (pool size`, as found here: https://ceph.io/pgcalc/.
        use_client_cache (bool): Toggles using cephFS I/O cache.
        device_path (optional str): If set, overrides the "device_path" extra info for all nodes with given value. Should point to device to use with bluestore on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.
        retries (optional int): Number of tries we try to perform potentially-crashing operations.

    Returns:
        `(True, admin_node_id)` on success, `(False, None)` otherwise.'''
    if not reservation or len(reservation) == 0:
        raise ValueError('Reservation does not contain any items' +
                         (' (reservation=None)' if not reservation else ''))

    if isinstance(placement_groups, int):
        if placement_groups < 1:
            raise ValueError(
                'Amount of placement groups must be higher than zero!')
    else:  # We assume `placememt_groups = None`
        placement_groups = _internal_compute_placement_groups(
            reservation=reservation)

    if device_path:  # We got an overriding device_path value
        for x in reservation.nodes:
            if 'designations' in x.extra_info and Designation.OSD.name.lower(
            ) in x.extra_info['designations'].split(','):
                x.extra_info['device_path'] = device_path
    else:
        if any(True for x in reservation.nodes
               if 'designations' in x.extra_info and Designation.OSD.name.
               lower() in x.extra_info['designations'].split(',')
               and not 'device_path' in x.extra_info
               ):  # We lack at least 1 device_path value.
            printe(
                'Missing "device_path" specifier on the following nodes:\n{}'.
                format('\n'.join(
                    '\t{}'.format(x) for x in reservation.nodes
                    if 'designations' in x.extra_info and Designation.OSD.name.
                    lower() in x.extra_info['designations'].split(',')
                    and not 'device_path' in x.extra_info)))
            return False, None

    admin_picked, _ = _internal_pick_admin(reservation, admin=admin_id)
    printc('Picked admin node: {}'.format(admin_picked), Color.CAN)

    local_connections = connectionwrapper == None

    if local_connections:
        ssh_kwargs = {
            'IdentitiesOnly': 'yes',
            'User': admin_picked.extra_info['user'],
            'StrictHostKeyChecking': 'no'
        }
        if key_path:
            ssh_kwargs['IdentityFile'] = key_path
        connectionwrapper = get_wrapper(admin_picked,
                                        admin_picked.ip_public,
                                        silent=silent,
                                        ssh_params=ssh_kwargs)
    rados_module = _generate_module_start()
    state_ok = _start_rados(connectionwrapper.connection,
                            rados_module,
                            reservation,
                            mountpoint_path,
                            osd_op_threads,
                            osd_pool_size,
                            osd_max_obj_size,
                            placement_groups,
                            use_client_cache,
                            silent=silent,
                            retries=retries)

    if local_connections:
        close_wrappers([connectionwrapper])

    if state_ok:
        prints('Started RADOS-Ceph succeeded.')
        return True, admin_picked.node_id
    else:
        printe('Starting RADOS-Ceph failed on some nodes.')
        return False, None
Пример #10
0
def subparser(subparsers):
    '''Register subparser modules'''
    startparser = subparsers.add_parser('start',
                                        help='Start RADOS-Ceph on a cluster.')
    startparser.add_argument(
        '--admin',
        metavar='id',
        dest='admin_id',
        type=int,
        default=None,
        help='ID of the node that will be the Ceph admin node.')
    startparser.add_argument(
        '--mountpoint',
        metavar='path',
        type=str,
        default=defaults.mountpoint_path(),
        help='Mountpoint for CephFS on all nodes (default={}).'.format(
            defaults.mountpoint_path()))
    startparser.add_argument(
        '--osd-op-threads',
        metavar='amount',
        dest='osd_op_threads',
        type=int,
        default=defaults.osd_op_threads(),
        help=
        'Number of op threads to use for each OSD (default={}). Make sure this number is not greater than the amount of cores each OSD has.'
        .format(defaults.osd_op_threads()))
    startparser.add_argument(
        '--osd-pool-size',
        metavar='amount',
        dest='osd_pool_size',
        type=int,
        default=defaults.osd_pool_size(),
        help='Fragmentation of objects across this number of OSDs (default={}).'
        .format(defaults.osd_pool_size()))
    startparser.add_argument(
        '--osd-max-obj-size',
        metavar='bytes',
        dest='osd_max_obj_size',
        type=int,
        default=defaults.osd_max_obj_size(),
        help=
        'Maximum size (in bytes) for a single object (default={}). If we try to write objects larger than this size, the cluster will permanently hang.'
        .format(defaults.osd_max_obj_size()))
    startparser.add_argument(
        '--placement-groups',
        metavar='amount',
        dest='placement_groups',
        type=int,
        default=None,
        help=
        'Amount of placement groups in Ceph. By default, we use the formula `(num osds * 100) / (pool size)`, as found here: https://ceph.io/pgcalc/.'
        .format(defaults.mountpoint_path()))
    startparser.add_argument(
        '--disable-client-cache',
        dest='disable_client_cache',
        help='If set, disables the I/O cache on the clients.')
    startparser.add_argument('--silent',
                             help='If set, less boot output is shown.',
                             action='store_true')
    startparser.add_argument(
        '--retries',
        metavar='amount',
        type=int,
        default=defaults.retries(),
        help='Amount of retries to use for risky operations (default={}).'.
        format(defaults.retries()))

    subsubparsers = startparser.add_subparsers(help='Subsubcommands',
                                               dest='subcommand')

    memstoreparser = subsubparsers.add_parser('memstore',
                                              help='''Start a memstore cluster.
Memstore stores all data inside the RAM of each Ceph OSD node.''')
    memstoreparser.add_argument(
        '--storage-size',
        metavar='amount',
        dest='storage_size',
        type=str,
        default=None,
        help=
        'Amount of bytes of RAM to allocate for storage with memstore (default={}). Value should not be greater than the amount of RAM available on each OSD node.'
        .format(defaults.memstore_storage_size()))

    bluestoreparser = subsubparsers.add_parser(
        'bluestore',
        help='''Start a bluestore cluster.
Bluestore stores all data on a separate device, using its own filesystem.
Each node must provide extra info:
 - device_path: Path to storage device, e.g. "/dev/nvme0n1p4".''')
    bluestoreparser.add_argument(
        '--device-path',
        metavar='path',
        dest='device_path',
        type=str,
        default=None,
        help='Overrides "device_path" specification for all nodes.')

    return [startparser, memstoreparser, bluestoreparser]