Exemplo n.º 1
0
def _get_zpool_datasets(pool_name, drives):
    """ Retrieve datasets belonging to a zpool """
    out = AgentShell.try_run(['zfs', 'list', '-H', '-o', 'name,avail,guid'])

    zpool_datasets = {}

    if out.strip() != "no datasets available":
        for line in filter(None, out.split('\n')):
            name, size_str, uuid = line.split()
            size = util.human_to_bytes(size_str)

            if name.startswith("%s/" % pool_name):
                # This will need discussion, but for now fabricate a major:minor. Do we ever use them as numbers?
                major_minor = "zfsset:%s" % uuid

                zpool_datasets[uuid] = {
                    "name": name,
                    "path": name,
                    "block_device": major_minor,
                    "uuid": uuid,
                    "size": size,
                    "drives": drives
                }

                daemon_log.debug("zfs mount '%s'" % name)

    return zpool_datasets
Exemplo n.º 2
0
def process_zfs_mount(device, data, zfs_mounts):
    # If zfs-backed target/dataset, lookup underlying pool to get uuid
    # and nested dataset in zed structures to access lustre svname (label).
    dev_root = device.split("/")[0]
    if not dev_root:
        return None, None

    if dev_root not in [d for d, _, _ in zfs_mounts]:
        daemon_log.debug("lustre device has no mounted zfs pool")
        # Do not skip the check below if pool's canmount=off
        # We do not have the pool properties ATM here
        # So we can't check if it's canmount=off

    try:
        pool = next(p for p in data["zed"].values() if p["name"] == dev_root)
        dataset = next(d for d in pool["datasets"] if d["name"] == device)

        fs_label = next(p["value"] for p in dataset["props"]
                        if p["name"] == "lustre:svname"  # used to be fsname
                        )

        fs_uuid = dataset["guid"]

        return fs_label, fs_uuid
    except StopIteration:
        daemon_log.debug("lustre device is not zfs")
        return None, None
Exemplo n.º 3
0
def process_zfs_mount(device, data, zfs_mounts):
    # If zfs-backed target/dataset, lookup underlying pool to get uuid
    # and nested dataset in zed structures to access lustre svname (label).
    dev_root = device.split('/')[0]
    if dev_root not in [d for d, m, f in zfs_mounts]:
        daemon_log.debug('lustre device is not zfs')
        return None, None, None

    pool = next(
        p for p in data['zed'].values()
        if p['name'] == dev_root
    )
    dataset = next(
        d for d in pool['datasets']
        if d['name'] == device
    )

    fs_label = next(
        p['value'] for p in dataset['props']
        if p['name'] == 'lustre:svname'  # used to be fsname
    )

    fs_uuid = dataset['guid']

    # note: this will be one of the many partitions that belong to the pool
    new_device = next(
        child['Disk']['path'] for child in pool['vdev']['Root']['children']
        if child.get('Disk')
    )

    return fs_label, fs_uuid, new_device
Exemplo n.º 4
0
    def poll(self, plugin_name):
        """
        For any plugins that don't have a session, try asking for one.
        For any ongoing sessions, invoke the poll callback
        """

        now = datetime.datetime.now()

        try:
            session = self._client.sessions.get(plugin_name)
        except KeyError:
            # Request to open a session
            #
            if plugin_name in self._client.sessions._requested_at:
                next_request_at = (
                    self._client.sessions._requested_at[plugin_name] +
                    self._client.sessions._backoffs[plugin_name])
                if now < next_request_at:
                    # We're still in our backoff period, skip requesting a session
                    daemon_log.debug("Delaying session request until %s" %
                                     next_request_at)
                    return
                else:
                    if (self._client.sessions._backoffs[plugin_name] <
                            MAX_SESSION_BACKOFF):
                        self._client.sessions._backoffs[plugin_name] *= 2

            daemon_log.debug("Requesting session for plugin %s" % plugin_name)
            self._client.sessions._requested_at[plugin_name] = now
            self.put(Message("SESSION_CREATE_REQUEST", plugin_name))
        else:
            try:
                data = session.poll()
            except Exception:
                backtrace = "\n".join(
                    traceback.format_exception(*(sys.exc_info())))
                daemon_log.error("Error in plugin %s: %s" %
                                 (plugin_name, backtrace))
                self._client.sessions.terminate(plugin_name)
                self.put(Message("SESSION_CREATE_REQUEST", plugin_name))
            else:
                if data is not None:
                    if isinstance(data, DevicePluginMessageCollection):
                        for message in data:
                            session.send_message(
                                DevicePluginMessage(message,
                                                    priority=data.priority))
                    elif isinstance(data, DevicePluginMessage):
                        session.send_message(data)
                    else:
                        session.send_message(DevicePluginMessage(data))
Exemplo n.º 5
0
def process_lvm_mount(device, data):
    try:
        bdev = next((v["paths"], v["lvUuid"])
                    for v in data["blockDevices"].itervalues()
                    if device in v["paths"] and v.get("lvUuid"))
    except StopIteration:
        daemon_log.debug("lustre device is not lvm")
        return None, None

    label_prefix = "/dev/disk/by-label/"
    fs_label = next(
        p.split(label_prefix, 1)[1] for p in bdev[0]
        if p.startswith(label_prefix))

    return fs_label, bdev[1]
Exemplo n.º 6
0
def find_device_and_children(device_path):
    devices = []

    try:
        # Then find all the partitions for that disk and add them, they are all a child of this
        # zfs pool, so
        # scsi-0QEMU_QEMU_HARDDISK_WD-WMAP3333333 includes
        # scsi-0QEMU_QEMU_HARDDISK_WD-WMAP3333333-part1
        for device in ndp.find_normalized_start(ndp.normalized_device_path(device_path)):
            daemon_log.debug("zfs device '%s'" % device)
            devices.append(device)
    except KeyError:
        pass

    return devices
Exemplo n.º 7
0
def process_lvm_mount(device, data):
    try:
        bdev = next(
            (v['paths'], v['lvUuid']) for v in data['blockDevices'].itervalues()
            if device in v['paths'] and v.get('lvUuid')
        )
    except StopIteration:
        daemon_log.debug('lustre device is not lvm')
        return None, None, None

    label_prefix = '/dev/disk/by-label/'
    fs_label = next(
        p.split(label_prefix, 1)[1] for p in bdev[0] if p.startswith(label_prefix)
    )

    return fs_label, bdev[1], None
Exemplo n.º 8
0
def write_to_store(key, value):
    """
    :param key: key to update value for store
    :param value: value to assign to given key
    :param filepath: filepath of store
    :return: None
    """
    daemon_log.debug('write_to_store(): writing zfs data to %s. key: %s' % (ZFS_OBJECT_STORE_PATH, key))

    dataDict = read_store()

    # preserve other keys, only update the key specified
    dataDict[key] = value

    dataString = json.dumps(dataDict)

    with open(ZFS_OBJECT_STORE_PATH, 'w') as f:
        f.write(dataString)
Exemplo n.º 9
0
    def _get_zpool_datasets(self, pool_name, zpool_uuid, drives,
                            block_devices):
        out = AgentShell.try_run(
            ['zfs', 'list', '-H', '-o', 'name,avail,guid'])

        zpool_datasets = {}

        if out.strip() != "no datasets available":
            for line in filter(None, out.split('\n')):
                name, size_str, uuid = line.split()
                size = util.human_to_bytes(size_str)

                if name.startswith("%s/" % pool_name):
                    # This will need discussion, but for now fabricate a major:minor. Do we ever use them as numbers?
                    major_minor = "zfsset:%s" % (len(self.datasets) + 1)
                    block_devices.block_device_nodes[major_minor] = {
                        'major_minor': major_minor,
                        'path': name,
                        'serial_80': None,
                        'serial_83': None,
                        'size': size,
                        'filesystem_type': 'zfs',
                        'parent': None
                    }

                    # Do this to cache the device, type see blockdevice and filesystem for info.
                    BlockDevice('zfs', name)
                    FileSystem('zfs', name)

                    zpool_datasets[uuid] = {
                        "name": name,
                        "path": name,
                        "block_device": major_minor,
                        "uuid": uuid,
                        "size": size,
                        "drives": drives
                    }

                    daemon_log.debug("zfs mount '%s'" % name)

        return zpool_datasets
Exemplo n.º 10
0
def install_packages(repos, packages):
    """
    Explicitly evaluate and install or update any specific-version dependencies and satisfy even if
    that involves installing an older package than is already installed.
    Primary use case is installing lustre-modules, which depends on a specific kernel package.

    :param repos: List of strings, yum repo names
    :param packages: List of strings, yum package names
    :return: package report of the format given by the lustre device plugin
    """
    if packages != []:
        yum_util('clean')

        out = yum_util('requires', enablerepo=repos, packages=packages)
        for requirement in [l.strip() for l in out.strip().split("\n")]:
            match = re.match("([^\)/]*) = (.*)", requirement)
            if match:
                require_package, require_version = match.groups()
                packages.append("%s-%s" % (require_package, require_version))

        yum_util('install', enablerepo=repos, packages=packages)

        # So now we have installed the packages requested, we will also make sure that any installed packages we
        # have that are already installed are updated to our presumably better versions.
        update_packages = yum_check_update(repos)

        if update_packages:
            daemon_log.debug(
                "The following packages need update after we installed IML packages %s"
                % update_packages)
            yum_util('update', packages=update_packages, enablerepo=repos)

        error = _check_HYD4050()

        if error:
            return agent_error(error)

    return agent_result(lustre.scan_packages())
    def _dev_major_minor(self, path):
        """ Return a string if 'path' is a block device or link to one, else return None """

        file_status = None
        retries = self.MAXRETRIES
        while retries > 0:
            try:
                file_status = os.stat(path)

                if path in self.non_existent_paths:
                    self.non_existent_paths.discard(path)
                    daemon_log.debug('New device started to respond %s' % path)

                self.previous_path_status[path] = file_status
                break
            except OSError as os_error:
                if os_error.errno not in [errno.ENOENT, errno.ENOTDIR]:
                    raise

                # An OSError could be raised because a path genuinely doesn't
                # exist, but it also can be the result of conflicting with
                # actions that cause devices to disappear momentarily, such as
                # during a partprobe while it reloads the partition table.
                # So we retry for a short window to catch those devices that
                # just disappear momentarily.
                time.sleep(0.1)
                retries -= retries if path in self.non_existent_paths else 1

        if file_status is None:
            if path not in self.non_existent_paths:
                self.non_existent_paths.add(path)
                daemon_log.debug('New device failed to respond %s' % path)

            if path not in self.previous_path_status:
                return None

            file_status = self.previous_path_status.pop(path)
            daemon_log.debug(
                'Device failed to respond but stored file_status used')

        if stat.S_ISBLK(file_status.st_mode):
            return "%d:%d" % (os.major(
                file_status.st_rdev), os.minor(file_status.st_rdev))
        else:
            return None
Exemplo n.º 12
0
    def send(self):
        """Return True if the POST succeeds, else False"""
        messages = []
        completion_callbacks = []

        post_envelope = {
            "messages": [],
            "server_boot_time": self._client.boot_time.isoformat() + "Z",
            "client_start_time": self._client.start_time.isoformat() + "Z",
        }

        # Any message we drop will need its session killed
        kill_sessions = set()

        messages_bytes = len(json.dumps(post_envelope))
        while True:
            try:
                message = self._retry_messages.get_nowait()
                daemon_log.debug("HttpWriter got message from retry queue")
            except Queue.Empty:
                try:
                    message = self._messages.get_nowait()
                    daemon_log.debug(
                        "HttpWriter got message from primary queue")
                except Queue.Empty:
                    break

            if message.callback:
                completion_callbacks.append(message.callback)
            message_length = len(json.dumps(message.dump(self._client._fqdn)))

            if message_length > MAX_BYTES_PER_POST:
                daemon_log.warning("Oversized message %s/%s: %s" % (
                    message_length,
                    MAX_BYTES_PER_POST,
                    message.dump(self._client._fqdn),
                ))

            if messages and message_length > MAX_BYTES_PER_POST - messages_bytes:
                # This message will not fit into this POST: pop it back into the queue
                daemon_log.info(
                    "HttpWriter message %s overflowed POST %s/%s (%d "
                    "messages), enqueuing" % (
                        message.dump(self._client._fqdn),
                        message_length,
                        MAX_BYTES_PER_POST,
                        len(messages),
                    ))
                self._retry_messages.put(message)
                break

            messages.append(message)
            messages_bytes += message_length

        daemon_log.debug("HttpWriter sending %s messages" % len(messages))
        try:
            post_envelope["messages"] = [
                m.dump(self._client._fqdn) for m in messages
            ]
            self._client.post(post_envelope)
        except HttpError:
            daemon_log.warning("HttpWriter: request failed")
            # Terminate any sessions which we've just droppped messages for
            for message in messages:
                if message.type == "DATA":
                    kill_sessions.add(message.plugin_name)
            for plugin_name in kill_sessions:
                self._client.sessions.terminate(plugin_name)

            return False
        else:
            return True
        finally:
            for callback in completion_callbacks:
                callback()
Exemplo n.º 13
0
 def join(self):
     daemon_log.debug("Client joining...")
     # self.reader.join()
     self.writer.join()
     self.sessions.terminate_all()
     daemon_log.debug("Client joined")
Exemplo n.º 14
0
 def stop(self):
     daemon_log.debug("Client stopping...")
     self.reader.stop()
     self.writer.stop()
     self.stopped.set()