示例#1
0
def set_sriov_vf_vm(zkhandler, vm_uuid, node, vf, vf_macaddr, vf_type):
    # Verify node is valid
    valid_node = common.verifyNode(zkhandler, node)
    if not valid_node:
        return False

    # Verify VF is valid
    vf_information = getSRIOVVFInformation(zkhandler, node, vf)
    if not vf_information:
        return False

    update_list = [
        (("node.sriov.vf", node, "sriov_vf.used", vf), "True"),
        (("node.sriov.vf", node, "sriov_vf.used_by", vf), vm_uuid),
        (("node.sriov.vf", node, "sriov_vf.mac", vf), vf_macaddr),
    ]

    # Hostdev type SR-IOV prevents the guest from live migrating
    if vf_type == "hostdev":
        update_list.append(
            (("domain.meta.migrate_method", vm_uuid), "shutdown"))

    zkhandler.write(update_list)

    return True
示例#2
0
def secondary_node(zkhandler, node):
    # Verify node is valid
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node
        )

    # Ensure node is a coordinator
    daemon_mode = zkhandler.read(("node.mode", node))
    if daemon_mode == "hypervisor":
        return (
            False,
            'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(
                node
            ),
        )

    # Ensure node is in run daemonstate
    daemon_state = zkhandler.read(("node.state.daemon", node))
    if daemon_state != "run":
        return False, 'ERROR: Node "{}" is not active'.format(node)

    # Get current state
    current_state = zkhandler.read(("node.state.router", node))
    if current_state == "secondary":
        return True, 'Node "{}" is already in secondary router mode.'.format(node)

    retmsg = "Setting node {} in secondary router mode.".format(node)
    zkhandler.write([("base.config.primary_node", "none")])

    return True, retmsg
示例#3
0
def add_osd_db_vg(zkhandler, node, device):
    # Verify the target node exists
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node)

    # Tell the cluster to create a new OSD for the host
    add_osd_db_vg_string = "db_vg_add {},{}".format(node, device)
    zkhandler.write([("base.cmd.ceph", add_osd_db_vg_string)])
    # Wait 1/2 second for the cluster to get the message and start working
    time.sleep(0.5)
    # Acquire a read lock, so we get the return exclusively
    with zkhandler.readlock("base.cmd.ceph"):
        try:
            result = zkhandler.read("base.cmd.ceph").split()[0]
            if result == "success-db_vg_add":
                message = 'Created new OSD database VG at "{}" on node "{}".'.format(
                    device, node)
                success = True
            else:
                message = "ERROR: Failed to create new OSD database VG; check node logs for details."
                success = False
        except Exception:
            message = "ERROR: Command ignored by node."
            success = False

    # Acquire a write lock to ensure things go smoothly
    with zkhandler.writelock("base.cmd.ceph"):
        time.sleep(0.5)
        zkhandler.write([("base.cmd.ceph", "")])

    return success, message
示例#4
0
def get_info_sriov_vf(zkhandler, node, vf):
    # Verify node is valid
    valid_node = common.verifyNode(zkhandler, node)
    if not valid_node:
        return False, 'ERROR: Specified node "{}" is invalid.'.format(node)

    vf_information = getSRIOVVFInformation(zkhandler, node, vf)
    if not vf_information:
        return False, 'ERROR: Could not find SR-IOV VF "{}" on node "{}"'.format(
            vf, node)

    return True, vf_information
示例#5
0
def get_info(zkhandler, node):
    # Verify node is valid
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node
        )

    # Get information about node in a pretty format
    node_information = getNodeInformation(zkhandler, node)
    if not node_information:
        return False, 'ERROR: Could not get information about node "{}".'.format(node)

    return True, node_information
示例#6
0
def add_osd(zkhandler,
            node,
            device,
            weight,
            ext_db_flag=False,
            ext_db_ratio=0.05):
    # Verify the target node exists
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node)

    # Verify target block device isn't in use
    block_osd = verifyOSDBlock(zkhandler, node, device)
    if block_osd:
        return (
            False,
            'ERROR: Block device "{}" on node "{}" is used by OSD "{}"'.format(
                device, node, block_osd),
        )

    # Tell the cluster to create a new OSD for the host
    add_osd_string = "osd_add {},{},{},{},{}".format(node, device, weight,
                                                     ext_db_flag, ext_db_ratio)
    zkhandler.write([("base.cmd.ceph", add_osd_string)])
    # Wait 1/2 second for the cluster to get the message and start working
    time.sleep(0.5)
    # Acquire a read lock, so we get the return exclusively
    with zkhandler.readlock("base.cmd.ceph"):
        try:
            result = zkhandler.read("base.cmd.ceph").split()[0]
            if result == "success-osd_add":
                message = 'Created new OSD with block device "{}" on node "{}".'.format(
                    device, node)
                success = True
            else:
                message = (
                    "ERROR: Failed to create new OSD; check node logs for details."
                )
                success = False
        except Exception:
            message = "ERROR: Command ignored by node."
            success = False

    # Acquire a write lock to ensure things go smoothly
    with zkhandler.writelock("base.cmd.ceph"):
        time.sleep(0.5)
        zkhandler.write([("base.cmd.ceph", "")])

    return success, message
示例#7
0
def get_node_log(zkhandler, node, lines=2000):
    # Verify node is valid
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node
        )

    # Get the data from ZK
    node_log = zkhandler.read(("logs.messages", node))

    if node_log is None:
        return True, ""

    # Shrink the log buffer to length lines
    shrunk_log = node_log.split("\n")[-lines:]
    loglines = "\n".join(shrunk_log)

    return True, loglines
示例#8
0
def get_list_sriov_vf(zkhandler, node, pf=None):
    # Verify node is valid
    valid_node = common.verifyNode(zkhandler, node)
    if not valid_node:
        return False, 'ERROR: Specified node "{}" is invalid.'.format(node)

    vf_list = list()
    vf_phy_list = common.sortInterfaceNames(
        zkhandler.children(("node.sriov.vf", node)))
    for phy in vf_phy_list:
        retcode, vf_information = get_info_sriov_vf(zkhandler, node, phy)
        if retcode:
            if pf is not None:
                if vf_information["pf"] == pf:
                    vf_list.append(vf_information)
            else:
                vf_list.append(vf_information)

    return True, vf_list
示例#9
0
def ready_node(zkhandler, node, wait=False):
    # Verify node is valid
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node
        )

    if zkhandler.read(("node.state.domain", node)) == "ready":
        return True, "Hypervisor {} is already ready.".format(node)

    retmsg = "Restoring hypervisor {} to active service.".format(node)

    # Add the new domain to Zookeeper
    zkhandler.write([(("node.state.domain", node), "unflush")])

    if wait:
        while zkhandler.read(("node.state.domain", node)) == "unflush":
            time.sleep(1)
        retmsg = "Restored hypervisor {} to active service.".format(node)

    return True, retmsg
示例#10
0
def flush_node(zkhandler, node, wait=False):
    # Verify node is valid
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
            node
        )

    if zkhandler.read(("node.state.domain", node)) == "flushed":
        return True, "Hypervisor {} is already flushed.".format(node)

    retmsg = "Flushing hypervisor {} of running VMs.".format(node)

    # Add the new domain to Zookeeper
    zkhandler.write([(("node.state.domain", node), "flush")])

    if wait:
        while zkhandler.read(("node.state.domain", node)) == "flush":
            time.sleep(1)
        retmsg = "Flushed hypervisor {} of running VMs.".format(node)

    return True, retmsg
示例#11
0
def unset_sriov_vf_vm(zkhandler, node, vf):
    # Verify node is valid
    valid_node = common.verifyNode(zkhandler, node)
    if not valid_node:
        return False

    # Verify VF is valid
    vf_information = getSRIOVVFInformation(zkhandler, node, vf)
    if not vf_information:
        return False

    update_list = [
        (("node.sriov.vf", node, "sriov_vf.used", vf), "False"),
        (("node.sriov.vf", node, "sriov_vf.used_by", vf), ""),
        (
            ("node.sriov.vf", node, "sriov_vf.mac", vf),
            zkhandler.read(("node.sriov.vf", node, "sriov_vf.phy_mac", vf)),
        ),
    ]

    zkhandler.write(update_list)

    return True
示例#12
0
def set_sriov_vf_config(
    zkhandler,
    node,
    vf,
    vlan_id=None,
    vlan_qos=None,
    tx_rate_min=None,
    tx_rate_max=None,
    link_state=None,
    spoof_check=None,
    trust=None,
    query_rss=None,
):
    # Verify node is valid
    valid_node = common.verifyNode(zkhandler, node)
    if not valid_node:
        return False, 'ERROR: Specified node "{}" is invalid.'.format(node)

    # Verify VF is valid
    vf_information = getSRIOVVFInformation(zkhandler, node, vf)
    if not vf_information:
        return False, 'ERROR: Could not find SR-IOV VF "{}" on node "{}".'.format(
            vf, node)

    update_list = list()

    if vlan_id is not None:
        update_list.append(
            (("node.sriov.vf", node, "sriov_vf.config.vlan_id", vf), vlan_id))

    if vlan_qos is not None:
        update_list.append((("node.sriov.vf", node, "sriov_vf.config.vlan_qos",
                             vf), vlan_qos))

    if tx_rate_min is not None:
        update_list.append((("node.sriov.vf", node,
                             "sriov_vf.config.tx_rate_min", vf), tx_rate_min))

    if tx_rate_max is not None:
        update_list.append((("node.sriov.vf", node,
                             "sriov_vf.config.tx_rate_max", vf), tx_rate_max))

    if link_state is not None:
        update_list.append((("node.sriov.vf", node,
                             "sriov_vf.config.link_state", vf), link_state))

    if spoof_check is not None:
        update_list.append((("node.sriov.vf", node,
                             "sriov_vf.config.spoof_check", vf), spoof_check))

    if trust is not None:
        update_list.append(
            (("node.sriov.vf", node, "sriov_vf.config.trust", vf), trust))

    if query_rss is not None:
        update_list.append((("node.sriov.vf", node,
                             "sriov_vf.config.query_rss", vf), query_rss))

    if len(update_list) < 1:
        return False, "ERROR: No changes to apply."

    result = zkhandler.write(update_list)
    if result:
        return (
            True,
            'Successfully modified configuration of SR-IOV VF "{}" on node "{}".'
            .format(vf, node),
        )
    else:
        return (
            False,
            'Failed to modify configuration of SR-IOV VF "{}" on node "{}".'.
            format(vf, node),
        )
示例#13
0
def move_vm(zkhandler, domain, target_node, wait=False, force_live=False):
    # Validate that VM exists in cluster
    dom_uuid = getDomainUUID(zkhandler, domain)
    if not dom_uuid:
        return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)

    # Get state and verify we're OK to proceed
    current_state = zkhandler.read(("domain.state", dom_uuid))
    if current_state != "start":
        # If the current state isn't start, preserve it; we're not doing live migration
        target_state = current_state
    else:
        if force_live:
            target_state = "migrate-live"
        else:
            target_state = "migrate"

    current_node = zkhandler.read(("domain.node", dom_uuid))

    if not target_node:
        target_node = common.findTargetNode(zkhandler, dom_uuid)
    else:
        # Verify node is valid
        valid_node = common.verifyNode(zkhandler, target_node)
        if not valid_node:
            return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node)

        # Check if node is within the limit
        node_limit = zkhandler.read(("domain.meta.node_limit", dom_uuid))
        if node_limit and target_node not in node_limit.split(","):
            return (
                False,
                'ERROR: Specified node "{}" is not in the allowed list of nodes for VM "{}".'.format(
                    target_node, domain
                ),
            )

        # Verify if node is current node
        if target_node == current_node:
            last_node = zkhandler.read(("domain.last_node", dom_uuid))
            if last_node:
                zkhandler.write([(("domain.last_node", dom_uuid), "")])
                return True, 'Making temporary migration permanent for VM "{}".'.format(
                    domain
                )

            return False, 'ERROR: VM "{}" is already running on node "{}".'.format(
                domain, current_node
            )

    if not target_node:
        return (
            False,
            'ERROR: Could not find a valid migration target for VM "{}".'.format(
                domain
            ),
        )

    retmsg = 'Permanently migrating VM "{}" to node "{}".'.format(domain, target_node)

    lock = zkhandler.exclusivelock(("domain.state", dom_uuid))
    with lock:
        zkhandler.write(
            [
                (("domain.state", dom_uuid), target_state),
                (("domain.node", dom_uuid), target_node),
                (("domain.last_node", dom_uuid), ""),
            ]
        )

        # Wait for 1/2 second for migration to start
        time.sleep(0.5)

    # Update any SR-IOV NICs
    update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node)

    if wait:
        while zkhandler.read(("domain.state", dom_uuid)) == target_state:
            time.sleep(0.5)
        retmsg = 'Permanently migrated VM "{}" to node "{}"'.format(domain, target_node)

    return True, retmsg
示例#14
0
def define_vm(
    zkhandler,
    config_data,
    target_node,
    node_limit,
    node_selector,
    node_autostart,
    migration_method=None,
    profile=None,
    tags=[],
    initial_state="stop",
):
    # Parse the XML data
    try:
        parsed_xml = lxml.objectify.fromstring(config_data)
    except Exception:
        return False, "ERROR: Failed to parse XML data."
    dom_uuid = parsed_xml.uuid.text
    dom_name = parsed_xml.name.text

    # Ensure that the UUID and name are unique
    if searchClusterByUUID(zkhandler, dom_uuid) or searchClusterByName(
        zkhandler, dom_name
    ):
        return (
            False,
            'ERROR: Specified VM "{}" or UUID "{}" matches an existing VM on the cluster'.format(
                dom_name, dom_uuid
            ),
        )

    if not target_node:
        target_node = common.findTargetNode(zkhandler, dom_uuid)
    else:
        # Verify node is valid
        valid_node = common.verifyNode(zkhandler, target_node)
        if not valid_node:
            return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node)

    # Validate the new RAM against the current active node
    node_total_memory = int(zkhandler.read(("node.memory.total", target_node)))
    if int(parsed_xml.memory.text) >= node_total_memory:
        return (
            False,
            'ERROR: VM configuration specifies more memory ({} MiB) than node "{}" has available ({} MiB).'.format(
                parsed_xml.memory.text, target_node, node_total_memory
            ),
        )

    # Validate the number of vCPUs against the current active node
    node_total_cpus = int(zkhandler.read(("node.data.static", target_node)).split()[0])
    if (node_total_cpus - 2) <= int(parsed_xml.vcpu.text):
        return (
            False,
            'ERROR: VM configuration specifies more vCPUs ({}) than node "{}" has available ({} minus 2).'.format(
                parsed_xml.vcpu.text, target_node, node_total_cpus
            ),
        )

    # If a SR-IOV network device is being added, set its used state
    dnetworks = common.getDomainNetworks(parsed_xml, {})
    for network in dnetworks:
        if network["type"] in ["direct", "hostdev"]:
            dom_node = zkhandler.read(("domain.node", dom_uuid))

            # Check if the network is already in use
            is_used = zkhandler.read(
                ("node.sriov.vf", dom_node, "sriov_vf.used", network["source"])
            )
            if is_used == "True":
                used_by_name = searchClusterByUUID(
                    zkhandler,
                    zkhandler.read(
                        (
                            "node.sriov.vf",
                            dom_node,
                            "sriov_vf.used_by",
                            network["source"],
                        )
                    ),
                )
                return (
                    False,
                    'ERROR: Attempted to use SR-IOV network "{}" which is already used by VM "{}" on node "{}".'.format(
                        network["source"], used_by_name, dom_node
                    ),
                )

            # We must update the "used" section
            set_sriov_vf_vm(
                zkhandler,
                dom_uuid,
                dom_node,
                network["source"],
                network["mac"],
                network["type"],
            )

    # Obtain the RBD disk list using the common functions
    ddisks = common.getDomainDisks(parsed_xml, {})
    rbd_list = []
    for disk in ddisks:
        if disk["type"] == "rbd":
            rbd_list.append(disk["name"])

    # Join the limit
    if isinstance(node_limit, list) and node_limit:
        formatted_node_limit = ",".join(node_limit)
    else:
        formatted_node_limit = ""

    # Join the RBD list
    if isinstance(rbd_list, list) and rbd_list:
        formatted_rbd_list = ",".join(rbd_list)
    else:
        formatted_rbd_list = ""

    # Add the new domain to Zookeeper
    zkhandler.write(
        [
            (("domain", dom_uuid), dom_name),
            (("domain.xml", dom_uuid), config_data),
            (("domain.state", dom_uuid), initial_state),
            (("domain.profile", dom_uuid), profile),
            (("domain.stats", dom_uuid), ""),
            (("domain.node", dom_uuid), target_node),
            (("domain.last_node", dom_uuid), ""),
            (("domain.failed_reason", dom_uuid), ""),
            (("domain.storage.volumes", dom_uuid), formatted_rbd_list),
            (("domain.console.log", dom_uuid), ""),
            (("domain.console.vnc", dom_uuid), ""),
            (("domain.meta.autostart", dom_uuid), node_autostart),
            (("domain.meta.migrate_method", dom_uuid), migration_method),
            (("domain.meta.node_limit", dom_uuid), formatted_node_limit),
            (("domain.meta.node_selector", dom_uuid), node_selector),
            (("domain.meta.tags", dom_uuid), ""),
            (("domain.migrate.sync_lock", dom_uuid), ""),
        ]
    )

    for tag in tags:
        tag_name = tag["name"]
        zkhandler.write(
            [
                (("domain.meta.tags", dom_uuid, "tag.name", tag_name), tag["name"]),
                (("domain.meta.tags", dom_uuid, "tag.type", tag_name), tag["type"]),
                (
                    ("domain.meta.tags", dom_uuid, "tag.protected", tag_name),
                    tag["protected"],
                ),
            ]
        )

    return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(
        dom_name, dom_uuid
    )
示例#15
0
def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
    if node:
        # Verify node is valid
        if not common.verifyNode(zkhandler, node):
            return False, 'Specified node "{}" is invalid.'.format(node)

    if state:
        valid_states = [
            "start",
            "restart",
            "shutdown",
            "stop",
            "disable",
            "fail",
            "migrate",
            "unmigrate",
            "provision",
        ]
        if state not in valid_states:
            return False, 'VM state "{}" is not valid.'.format(state)

    full_vm_list = zkhandler.children("base.domain")

    # Set our limit to a sensible regex
    if limit:
        # Check if the limit is a UUID
        is_limit_uuid = False
        try:
            uuid_obj = UUID(limit, version=4)
            limit = str(uuid_obj)
            is_limit_uuid = True
        except ValueError:
            pass

        if is_fuzzy and not is_limit_uuid:
            try:
                # Implcitly assume fuzzy limits
                if not re.match(r"\^.*", limit):
                    limit = ".*" + limit
                if not re.match(r".*\$", limit):
                    limit = limit + ".*"
            except Exception as e:
                return False, "Regex Error: {}".format(e)

    get_vm_info = dict()
    for vm in full_vm_list:
        name = zkhandler.read(("domain", vm))
        is_limit_match = False
        is_tag_match = False
        is_node_match = False
        is_state_match = False

        # Check on limit
        if limit:
            # Try to match the limit against the UUID (if applicable) and name
            try:
                if is_limit_uuid and re.fullmatch(limit, vm):
                    is_limit_match = True
                if re.fullmatch(limit, name):
                    is_limit_match = True
            except Exception as e:
                return False, "Regex Error: {}".format(e)
        else:
            is_limit_match = True

        if tag:
            vm_tags = zkhandler.children(("domain.meta.tags", vm))
            if negate and tag not in vm_tags:
                is_tag_match = True
            if not negate and tag in vm_tags:
                is_tag_match = True
        else:
            is_tag_match = True

        # Check on node
        if node:
            vm_node = zkhandler.read(("domain.node", vm))
            if negate and vm_node != node:
                is_node_match = True
            if not negate and vm_node == node:
                is_node_match = True
        else:
            is_node_match = True

        # Check on state
        if state:
            vm_state = zkhandler.read(("domain.state", vm))
            if negate and vm_state != state:
                is_state_match = True
            if not negate and vm_state == state:
                is_state_match = True
        else:
            is_state_match = True

        get_vm_info[vm] = (
            True
            if is_limit_match and is_tag_match and is_node_match and is_state_match
            else False
        )

    # Obtain our VM data in a thread pool
    # This helps parallelize the numerous Zookeeper calls a bit, within the bounds of the GIL, and
    # should help prevent this task from becoming absurdly slow with very large numbers of VMs.
    # The max_workers is capped at 32 to avoid creating an absurd number of threads especially if
    # the list gets called multiple times simultaneously by the API, but still provides a noticeable
    # speedup.
    vm_execute_list = [vm for vm in full_vm_list if get_vm_info[vm]]
    vm_data_list = list()
    with ThreadPoolExecutor(max_workers=32, thread_name_prefix="vm_list") as executor:
        futures = []
        for vm_uuid in vm_execute_list:
            futures.append(
                executor.submit(common.getInformationFromXML, zkhandler, vm_uuid)
            )
        for future in futures:
            try:
                vm_data_list.append(future.result())
            except Exception:
                pass

    return True, vm_data_list