Пример #1
0
    def __init__(self, cluster_metadata):
        # TODO (jklein): Would be nice to standardize this in a cleaner way.
        CHECK(cluster_metadata.cluster_hypervisor_info.HasField("ahv_info"))
        CHECK(cluster_metadata.cluster_software_info.HasField("nutanix_info"))
        CHECK(
            cluster_metadata.cluster_management_server_info.HasField(
                "prism_info"))

        # Prism information for the PE/PC server that manages this cluster.
        self._mgmt_server_info = \
            cluster_metadata.cluster_management_server_info.prism_info

        cluster_metadata.cluster_software_info.nutanix_info.prism_host = \
            self._mgmt_server_info.prism_host

        # Map of VM UUIDs to host UUIDs on which they should be scheduled.
        self.__vm_uuid_host_uuid_map = {}

        # ID Caches
        self.__cluster_id = None
        self.__container_id = None
        self.__network_id = None
        self.__host_ip_cvm_ip_map = None

        super(AcropolisCluster, self).__init__(cluster_metadata)
Пример #2
0
 def __init__(self, vm_params):
     CHECK(vm_params.cluster)
     CHECK(vm_params.vm_id)
     # See the comments in VmParams for these fields.
     self._cluster = vm_params.cluster
     self._vm_id = vm_params.vm_id
     self._vm_name = vm_params.vm_name
     self._vm_ip = vm_params.vm_ip
     self._node_id = vm_params.node_id
     self._is_cvm = vm_params.is_cvm
Пример #3
0
 def __produce_curie_metrics(self, stats_specific_responses, node):
     responses_by_counter_name = {}
     for metric in stats_specific_responses:
         responses_by_counter_name[metric["metric"]] = metric
     results = []
     for curie_metric in self.metrics():
         ahv_counter_name = self._curie_metric_to_metric_name(curie_metric)
         metric = responses_by_counter_name[ahv_counter_name]
         start_time_secs = int(metric["startTimeInUsecs"] / 1e6)
         interval_secs = int(metric["intervalInSecs"])
         values = metric["values"]
         offsets = [index * interval_secs for index in range(len(values))]
         timestamps = [start_time_secs + offset for offset in offsets]
         # If any values are None, remove it and its corresponding timestamp.
         timestamp_value_tuples = [
             tup for tup in zip(timestamps, values) if tup[1] is not None
         ]
         if timestamp_value_tuples:
             timestamps, values = zip(*timestamp_value_tuples)
         else:
             timestamps, values = [], []
         result = CurieMetric()
         result.CopyFrom(curie_metric)
         # TODO(ryan.hardin): Generalize unit conversion, move to utility module.
         if result.rate == CurieMetric.kPerSecond:
             # Convert units per interval into units per second.
             values = [(value / float(interval_secs)) for value in values]
         if result.unit == CurieMetric.kPercent:
             # Assume metric in ppm (parts per million) - convert to percentage.
             values = [(value / 1e4) for value in values]
         elif result.unit == CurieMetric.kKilobytes:
             # Assume metric in bytes - convert to kilobytes.
             values = [(value / float(2**10)) for value in values]
         elif (result.unit == CurieMetric.kMegahertz
               and result.name == CurieMetric.kCpuUsage):
             # Assume metric in ppm (parts per million) - convert total megahertz.
             # TODO(ryan.hardin): Should node.cpu_capacity_in_hz ever return None?
             if node.cpu_capacity_in_hz is None:
                 log.debug("node.cpu_capacity_in_hz returned None")
                 timestamps, values = [], []
             else:
                 values = [(cpu_ppm * node.cpu_capacity_in_hz / 1e12)
                           for cpu_ppm in values]
         CHECK(len(result.timestamps) == 0)
         result.timestamps.extend(timestamps)
         CHECK(len(result.values) == 0)
         result.values.extend([int(value) for value in values])
         results.append(result)
     return results
Пример #4
0
def curie_error_to_http_status(error_code):
    """
  Returns an HTTP status code corresponding to the curie error code
  'error_code'.
  """
    CHECK(error_code in ERROR_CODE_HTTP_STATUS_MAP, msg=error_code)
    return ERROR_CODE_HTTP_STATUS_MAP[error_code]
Пример #5
0
 def initialize(self):
     "Initialize agent state."
     CHECK(not self.__initialized)
     # Create initial directory structure and empty settings file if needed.
     if not os.path.exists(FLAGS.curie_agent_dir):
         self.__setup()
     # Delete incomplete command directories.
     cmds_dir = self.cmds_dir()
     for name in os.listdir(cmds_dir):
         cmd_dir = os.path.join(cmds_dir, name)
         arg_path = os.path.join(cmd_dir, "arg.bin")
         status_path = os.path.join(cmd_dir, "status.bin")
         if not os.path.exists(arg_path) or not os.path.exists(status_path):
             log.warning("Deleting incomplete command directory %s",
                         cmd_dir)
             shutil.rmtree(cmd_dir)
     # Delete old garbage and create an empty garbage directory.
     if os.path.exists(self.__garbage_dir):
         shutil.rmtree(self.__garbage_dir)
     os.mkdir(self.__garbage_dir)
     # Recover self.__cmd_map based on state on disk and running commands.
     self.__recover_cmd_map()
     # Mark agent as being initialized.
     self.__initialized = True
     # Start thread to perform periodic updates of command state and GC.
     self.__cmd_poller_thread = \
       threading.Thread(target=self.__cmd_poller_thread_func)
     self.__cmd_poller_thread.start()
Пример #6
0
 def import_vm(self,
               goldimages_directory,
               goldimage_name,
               vm_name,
               node_id=None):
     """
 Creates a VM from the specified gold image. If 'node_id' is specified, the
 VM is created on that node, else a random node is selected. The VM will be
 created on the datastore associated with the curie server's settings for
 this cluster.
 """
     with self._open_vcenter_connection() as vcenter:
         vim_datacenter, vim_cluster, vim_datastore = \
           self._lookup_vim_objects(vcenter)
         vim_network = None
         if self._vcenter_info.HasField("vcenter_network_name"):
             for vim_datacenter_network in vim_datacenter.network:
                 if (vim_datacenter_network.name ==
                         self._vcenter_info.vcenter_network_name):
                     vim_network = vim_datacenter_network
                     break
             CHECK(vim_network is not None,
                   msg=self._vcenter_info.vcenter_network_name)
         # We use the HostSystem (node) name as a node ID for vSphere clusters.
         host_name = node_id
         vim_vm = vcenter.import_vm(goldimages_directory,
                                    goldimage_name,
                                    vim_datacenter,
                                    vim_cluster,
                                    vim_datastore,
                                    vim_network,
                                    vm_name,
                                    host_name=host_name)
         return self.__vim_vm_to_curie_vm(vim_vm)
Пример #7
0
    def check_cmd(self, cmd_id, desired_state=CmdStatus.kSucceeded):
        """Check if a command has reached a desired state.

    Args:
      cmd_id (str): ID of the command to check.
      desired_state (CmdStatus): Desired command state.

    Returns:
      CmdStatus protobuf on success, and None if the command is in progress.

    Raises:
      CurieTestException if the command has reached a terminal state that is
        different from the desired state.
    """
        rpc_client = AgentRpcClient(self._vm_ip)
        arg = charon_agent_interface_pb2.CmdStatusArg()
        arg.cmd_id = cmd_id
        ret, err = rpc_client.CmdStatus(arg)
        if ret.cmd_status.state == desired_state:
            # Command has reached the desired state. The desired state could be a
            # non-terminal state or a terminal state.
            cmd_status = ret.cmd_status
            CHECK(cmd_status)
            return cmd_status
        elif ret.cmd_status.state != CmdStatus.kRunning:
            # Command has reached a terminal state. If we're here, this implies
            # that the command's terminal state is not the desired state because
            # if it was, then we would have already returned above.
            CHECK_NE(ret.cmd_status.state, desired_state)
            error_msg = ("Command %s terminal state %s != desired state %s" %
                         (cmd_id, CmdStatus.Type.Name(ret.cmd_status.state),
                          CmdStatus.Type.Name(desired_state)))
            raise CurieTestException(error_msg)
        return None
Пример #8
0
    def snapshot_vms(self,
                     vms,
                     snapshot_names,
                     snapshot_descriptions=(),
                     max_parallel_tasks=None):
        """
    For each VM with name 'vm_names[xx]' on the cluster 'vim_cluster', creates
    a snapshot with snapshot name 'snapshot_names[xx]' and optional description
    'snapshot_descriptions[xx]'.

    Args
      vms list of CurieVMs: List of VMs to create snapshots for.
      snapshot_names list of strings: Names for snapshot which must be the same
        length as 'vms'.
      snapshot_descriptions List of strings: List of descriptions for each
        snapshot corresponding to 'vms' and 'snapshot_names'. If provided it
        must be the same length as 'vms'.
      max_parallel_tasks int: The number of VMs to power on in parallel. The
        default value is FLAGS.vsphere_vcenter_max_parallel_tasks.
    """
        max_parallel_tasks = self._get_max_parallel_tasks(max_parallel_tasks)
        CHECK_EQ(len(vms), len(snapshot_names))
        CHECK(
            len(snapshot_descriptions) == 0
            or len(snapshot_descriptions) == len(snapshot_names))
        with self._open_vcenter_connection() as vcenter:
            vim_cluster = self._lookup_vim_cluster(vcenter)
            vcenter.snapshot_vms(vim_cluster, [vm.vm_name() for vm in vms],
                                 snapshot_names,
                                 snapshot_descriptions=snapshot_descriptions,
                                 max_parallel_tasks=max_parallel_tasks)
Пример #9
0
    def __init__(self, cluster_metadata):
        super(VsphereCluster, self).__init__(cluster_metadata)
        CHECK(cluster_metadata.cluster_hypervisor_info.HasField("esx_info"))
        CHECK(
            cluster_metadata.cluster_management_server_info.HasField(
                "vcenter_info"))
        # vCenter information for the vCenter server that manages this cluster.
        self._vcenter_info = \
          self._metadata.cluster_management_server_info.vcenter_info

        # Datacenter name of the datacenter where the cluster to run on lives.
        self.__datacenter_name = self._vcenter_info.vcenter_datacenter_name

        # Cluster name of the cluster to run on.
        self.__cluster_name = self._vcenter_info.vcenter_cluster_name

        # Datastore name of the datastore to deploy test VMs on the cluster.
        self.__datastore_name = self._vcenter_info.vcenter_datastore_name
Пример #10
0
 def run(self):
     "Run the Flask server forever."
     CHECK(self.__initialized)
     if self.__agent_uid != 0:
         log.warning("curie_unix_agent not running as root")
     log.info("Running agent on TCP port %d", FLAGS.curie_agent_port)
     self.__app.run(debug=FLAGS.curie_agent_flask_debug_enabled,
                    host="0.0.0.0",
                    port=FLAGS.curie_agent_port,
                    threaded=True)
Пример #11
0
    def _initialize(self, root_tree, xpath):
        """
    Updates root tree and element xpath.

    Handles updates as appropriate after resolving changes to the tree due to
    class inheritance.
    """
        CHECK(not self.__is_initialized, "Cannot reinitialize this descriptor")
        self.__is_initialized = True
        self.__root_tree = root_tree
        self.__xpath = xpath
Пример #12
0
 def cmd_execute_sync(self, arg, timeout_secs, include_output=False):
     """
 Given the CmdExecute request 'arg', simulate synchronous execution by
 sending a CmdExecute RPC to start the command, polling until the command
 reaches a terminal state, then returning an (exit_status, stdout, stderr)
 tuple for the command. 'include_output' specifies whether stdout and stderr
 should be returned or not (both are None if this is set to False).
 """
     curie_ex = None
     try:
         t1 = time.time()
         # Send a CmdExecute RPC to start the command.
         execute_ret, execute_err = self.CmdExecute(arg)
         if execute_err is not None:
             raise CurieException(execute_err.error_codes[0],
                                  execute_err.error_msgs[0])
         status_arg = charon_agent_interface_pb2.CmdStatusArg()
         status_arg.cmd_id = arg.cmd_id
         status_arg.include_output = include_output
         status_ret = None
         # Poll until the command reaches a terminal state or it times out.
         while True:
             status_ret, status_err = self.CmdStatus(status_arg)
             if status_err is not None:
                 raise CurieException(status_err.error_codes[0],
                                      status_err.error_msgs[0])
             if (status_ret.cmd_status.state !=
                     charon_agent_interface_pb2.CmdStatus.kRunning):
                 # Command is in a terminal state.
                 break
             t2 = time.time()
             if (t2 - t1) > timeout_secs:
                 raise CurieException(
                     CurieError.kTimeout,
                     "Timeout waiting for command %s" % arg.cmd_id)
             time.sleep(1)
         # Check that we have the exit status for the command.
         CHECK(status_ret is not None)
         if not status_ret.cmd_status.HasField("exit_status"):
             raise CurieException(
                 CurieError.kInternalError,
                 "Missing exit status for command %s" % arg.cmd_id)
         # Return an (exit_status, stdout, stderr) tuple for the command.
         exit_status = status_ret.cmd_status.exit_status
         stdout = status_ret.stdout if status_ret.HasField(
             "stdout") else None
         stderr = status_ret.stderr if status_ret.HasField(
             "stderr") else None
         return (exit_status, stdout, stderr)
     except CurieException, ex:
         curie_ex = ex
         raise
Пример #13
0
    def __init__(self, node, rest_api_timeout_secs=60):
        # Full cluster metadata proto.
        self.__cluster_metadata = node.cluster().metadata()

        # Node object for which this util is used.
        self.__node = node

        software_info = self.__cluster_metadata.cluster_software_info
        CHECK(software_info.HasField("nutanix_info"))

        # NutanixRestApiClient instance to use.
        self.__api = NutanixRestApiClient.from_proto(
            software_info.nutanix_info, timeout_secs=rest_api_timeout_secs)
Пример #14
0
    def __maybe_finalize_cmd_state(self, cmd_state, exit_status=None):
        """
    Finalize the command state in memory/disk if the command isn't already in a
    terminal state. An 'exit_status' value of -1 indicates the command was
    stopped. An 'exit_status' value of -2 indicates that the command had a
    non-normal exit (e.g. was terminated by a signal), which we just classify
    as failed.

    Assumes self.__lock is held.
    """
        status_path = os.path.join(self.cmd_dir(cmd_state.cmd_id),
                                   "status.bin")
        cmd_status = CmdStatus()
        cmd_status.ParseFromString(open(status_path).read())
        if cmd_status.state != CmdStatus.kRunning:
            # Command may have already been stopped.
            return
        log.info("Finalizing status for command %s: exit_status %s",
                 cmd_state.cmd_id, exit_status)
        CHECK(cmd_status.HasField("pid"), msg=cmd_state.cmd_id)
        CHECK(not cmd_status.HasField("exit_status"), msg=cmd_state.cmd_id)
        if exit_status is not None:
            if exit_status == -1:
                cmd_status.state = CmdStatus.kStopped
            elif exit_status == 0:
                cmd_status.state = CmdStatus.kSucceeded
            else:
                cmd_status.state = CmdStatus.kFailed
            cmd_status.exit_status = exit_status
        else:
            cmd_status.state = CmdStatus.kUnknown
        cmd_status.ClearField("pid")
        status_data = cmd_status.SerializeToString()
        OsUtil.write_and_rename(status_path, status_data)
        cmd_state.proc = None
        cmd_state.pid = None
Пример #15
0
    def fetch_cmd_status(self, cmd_id):
        """Fetch the status for cmd_id.

    Returns:
      (ret, err): where
      ret: CmdStatusRet filled out by rpc call to agent.
      err: None if no error or CurieError if there was an error.

    Raises:
      CurieException may be raised by rpc_client.CmdStatus().
    """
        arg = charon_agent_interface_pb2.CmdStatusArg()
        arg.cmd_id = cmd_id
        ret, err = self.CmdStatus(arg)
        CHECK(ret.cmd_status)
        return ret, err
Пример #16
0
def _set_encryption_key(cls, key):
    """
  Associates 'key' with this message class.

  To prevent data loss, the key may currently be initialized only once.

  Args:
    key (str): Encryption key to use, assumed to be a SHA-256 digest.

  Raises:
    FATAL on attempting to overwrite an existing key with a new value.
  """
    curr_key = getattr(_get_top_level_type_desc(cls), "_ENCRYPTION_KEY", None)
    if curr_key and curr_key == key:
        return

    CHECK(curr_key is None,
          msg="Encryption key has already been set %s %s" % (curr_key, key))
    setattr(_get_top_level_type_desc(cls), "_ENCRYPTION_KEY", key)
Пример #17
0
    def __init__(self, cluster, node_id, node_index):
        # Cluster this node is part of.
        self._cluster = cluster

        # Node ID. This node ID need not be unique across all nodes managed by a
        # given management server. However, it must be unique within the set of
        # nodes for the cluster.
        self._node_id = node_id
        CHECK(self._node_id)

        # Node index (corresponds to the index of this node within the node #
        # metadata vector)
        self._node_index = node_index
        CHECK_GE(self._node_index, 0, "Node index must be a natural number")

        # IP address currently associated with this node.
        self._node_ip = None

        self.__node_util = get_node_management_util(cluster.metadata())(self)
        self.power_management_util = get_power_management_util(
            self.metadata().node_out_of_band_management_info)
Пример #18
0
    def check_nodes_ready(self, nodes, sync_with_oob=True):
        """
    Performs minimal checks to see if nodes are in a functioning state.
    Specifically:
      -- All nodes in 'nodes' report ready via their 'is_ready' method.
      -- Subclasses may extend this method to provide additional cluster-level
      checks specific to a given cluster type.

    Args:
      nodes (list<Node>): List of nodes to check.
      sync_with_oob (bool): Optional. If True, ensure host power states are
        consistent as reported by OOB and management software prior to
        performing subsequent checks.

    Returns:
      (bool) True if all nodes are ready, else False.
    """
        nodes_not_in_cluster = set(nodes).difference(self.nodes())
        CHECK(
            len(nodes_not_in_cluster) == 0, "%s are not members of %s" %
            ([node.node_id() for node in nodes_not_in_cluster], self._name))
        return len(
            self.get_unready_nodes(nodes=nodes,
                                   sync_with_oob=sync_with_oob)) == 0
Пример #19
0
    def update_metadata(self, include_reporting_fields):
        cluster_json = self.__lookup_cluster_json()

        self._node_id_metadata_map = {
            node.id: node
            for node in self._metadata.cluster_nodes
        }

        node_uuid_metadata_id_map = self.get_node_uuid_metadata_id_map()
        for node_json in self._prism_client.hosts_get().get("entities", []):
            if node_json["clusterUuid"] != cluster_json["clusterUuid"]:
                continue
            try:
                curr_node_identifier = node_uuid_metadata_id_map[
                    node_json["uuid"]]
            except KeyError:
                # If the node is missing in the metadata, skip it.
                continue
            node_proto = self._node_id_metadata_map.get(curr_node_identifier)
            CHECK(node_proto)
            node_proto.id = node_json["uuid"]

            if include_reporting_fields:
                node_hw = node_proto.node_hardware
                node_hw.num_cpu_packages = node_json["numCpuSockets"]
                node_hw.num_cpu_cores = node_json["numCpuCores"]
                node_hw.num_cpu_threads = node_json["numCpuThreads"]
                node_hw.cpu_hz = node_json["cpuFrequencyInHz"]
                node_hw.memory_size = node_json["memoryCapacityInBytes"]

        if include_reporting_fields:
            # TODO (jklein): AHV info per-node.
            cluster_software_info = self._metadata.cluster_software_info
            nutanix_version = self._prism_client.get_nutanix_metadata().version
            if nutanix_version is not None:
                cluster_software_info.nutanix_info.version = nutanix_version
Пример #20
0
    def prereq_runtime_vm_storage_is_ready(cluster):
        """
    Confirms that curie test VM storage on each node in 'cluster' is
    available.

    Raises:
      CurieTestException if curie test VM storage is unavailable on any node.
    """
        metadata = cluster.metadata()
        if metadata.cluster_hypervisor_info.HasField("esx_info"):
            num_nodes = len(metadata.cluster_nodes)
            CHECK(
                metadata.cluster_management_server_info.HasField(
                    "vcenter_info"))
            vcenter_info = metadata.cluster_management_server_info.vcenter_info
            datastore_name = vcenter_info.vcenter_datastore_name
            # Check that the datastore is visible on all nodes in vCenter.
            log.info(
                "Checking that datastore %s is visible on all %s nodes in "
                "vCenter", datastore_name, cluster.name())
            if not cluster.datastore_visible(datastore_name):
                raise CurieTestException(
                    "Datastore %s not visible on all %s nodes "
                    "in vCenter" % (datastore_name, cluster.name()))
            log.info("Datastore %s is visible on all %s nodes in vCenter",
                     datastore_name, cluster.name())
            cluster_software_info = metadata.cluster_software_info
            if cluster_software_info.HasField("nutanix_info"):
                # On a Nutanix cluster, check that the datastore is also visible on all
                # nodes in Prism.
                log.info(
                    "Checking that datastore %s is visible by Prism on all %s "
                    "nodes", datastore_name, cluster.name())
                client = NutanixRestApiClient.from_proto(
                    cluster_software_info.nutanix_info)
                host_id_datastore_map = {}
                for item in client.datastores_get():
                    host_id_datastore_map.setdefault(item["hostId"], set())
                    host_id_datastore_map[item["hostId"]].add(
                        item["datastoreName"])
                CHECK_LE(len(host_id_datastore_map), num_nodes)
                for host_id in host_id_datastore_map:
                    if datastore_name not in host_id_datastore_map[host_id]:
                        raise CurieTestException(
                            "Datastore %s not visible by Prism on %s node %s" %
                            (datastore_name, cluster.name(), host_id))
                log.info("Datastore %s is visible by Prism on all %s nodes",
                         datastore_name, cluster.name())
            elif cluster_software_info.HasField("vsan_info"):
                pass
            elif cluster_software_info.HasField("generic_info"):
                pass
            else:
                raise ValueError("Unknown cluster software info, metadata %s" %
                                 metadata)
        elif metadata.cluster_hypervisor_info.HasField("hyperv_info"):
            # TODO (bferlic): More thorough checking here?
            return True
        elif metadata.cluster_hypervisor_info.HasField("ahv_info"):
            # TODO (jklein): More thorough checking here?
            return True
        else:
            raise ValueError("Unknown hypervisor type, metadata %s" % metadata)
Пример #21
0
    def prereq_runtime_vm_storage_is_ready_fix(cluster):
        """
    Attempt to make curie test VM storage available on all nodes.

    Raises:
      CurieTestException on error or timeout.
    """
        metadata = cluster.metadata()
        if metadata.cluster_hypervisor_info.HasField("esx_info"):
            CHECK(
                metadata.cluster_management_server_info.HasField(
                    "vcenter_info"))
            vcenter_info = metadata.cluster_management_server_info.vcenter_info
            datastore_name = vcenter_info.vcenter_datastore_name

            def datastore_visible():
                try:
                    ScenarioUtil.prereq_runtime_vm_storage_is_ready(cluster)
                    return True
                except CurieTestException:
                    pass
            msg = "datastore %s visible on all %s nodes" % \
              (datastore_name, cluster.name())
            # Refresh datastores state on all nodes to try and make the datastore
            # visible from vCenter's perspective.
            log.info("Refreshing datastores on all %s nodes", cluster.name())
            cluster.refresh_datastores()
            if CurieUtil.wait_for(datastore_visible, msg, 60):
                return
            cluster_software_info = metadata.cluster_software_info
            if cluster_software_info.HasField("nutanix_info"):
                client = NutanixRestApiClient.from_proto(
                    cluster_software_info.nutanix_info)
                container_name = None
                for item in client.datastores_get():
                    if item["datastoreName"] == datastore_name:
                        container_name = item["containerName"]
                        break
                if container_name is None:
                    log.warning(
                        "Datastore %s not mounted on any %s nodes, assuming "
                        "container name is the same as the desired datastore "
                        "name", datastore_name, cluster.name())
                    # Assume that the desired datastore has the same name as an existing
                    # container name.
                    container_name = datastore_name
                # Remount the datastore to try and make the datastore visible.
                log.info(
                    "Unmounting and mounting datastore %s (container %s) on %s",
                    datastore_name, container_name, cluster.name())
                try:
                    client.datastores_delete(datastore_name, verify=True)
                except CurieException, ex:
                    if ex.error_code != CurieError.kInvalidParameter:
                        raise
                    # If Prism views the datastore as unmounted, kInvalidParameter is
                    # returned so continue to try and mount the datastore on all nodes.
                client.datastores_create(container_name,
                                         datastore_name=datastore_name)
                cluster.refresh_datastores()
                if not CurieUtil.wait_for(datastore_visible, msg, 60):
                    raise CurieTestException(
                        "Timeout waiting for datastore %s for "
                        "VM storage to become visible on %s" %
                        (datastore_name, cluster.name()))
            elif cluster_software_info.HasField("vsan_info"):
                raise CurieTestException(
                    "VSAN datastore %s not mounted on all %s "
                    "nodes" % (datastore_name, cluster.name()))
            elif cluster_software_info.HasField("generic_info"):
                raise CurieTestException(
                    "Datastore %s not mounted on all %s nodes" %
                    (datastore_name, cluster.name()))
            else:
                raise ValueError("Unknown cluster software info, metadata %s" %
                                 metadata)
Пример #22
0
    def __init__(self, name=__name__):
        CHECK(FLAGS.curie_agent_dir, "--curie_agent_dir must be set")

        # If requested, monkey-patch werkzeug to suppress Flask logspam.
        if FLAGS.curie_agent_flask_suppress_werkzeug_logs:
            log.debug(
                "--curie_agent_flask_suppress_werkzeug_logs is set. Monkey-patching "
                "'werkzeug._internal._log' to suppress unwanted output")
            CurieUtil.monkey_patch_werkzeug_logger()

        # Flask application.
        self.__app = flask.Flask(name)

        # RPC server.
        self.__rpc_server = RpcServer(
            charon_agent_interface_pb2.CurieAgentRpcSvc)

        # UID of the user running the agent.
        self.__agent_uid = os.getuid()

        # Garbage directory.
        self.__garbage_dir = os.path.join(FLAGS.curie_agent_dir, "garbage")

        # Whether we're done with initialization or not.
        self.__initialized = False

        # Thread that periodically updates the status of commands in
        # self.__cmd_map.
        self.__cmd_poller_thread = None

        # Thread that periodically garbage collects state for commands that can be
        # removed.
        self.__cmd_gc_thread = None

        # Lock that protects the fields below and all non stdout/stderr state for
        # all commands on disk.
        self.__lock = threading.Lock()

        # Mapping from command ID to command state.
        self.__cmd_map = {}

        #--------------------------------------------------------------------------
        #
        # Handlers for web pages.
        #
        #--------------------------------------------------------------------------

        @self.__app.route("/")
        def web_index():
            return ""

        #--------------------------------------------------------------------------
        #
        # RPC endpoint.
        #
        #--------------------------------------------------------------------------

        @self.__app.route("/rpc", methods=["POST"])
        @self.__rpc_server.endpoint
        def rpc_endpoint():
            return flask.request

        #--------------------------------------------------------------------------
        #
        # RPC handlers.
        #
        #--------------------------------------------------------------------------

        @self.__rpc_server.handler("CmdExecute")
        @curie_unix_agent_api_handler
        def api_cmd_execute(arg):
            return self.__api_cmd_execute(arg)

        @self.__rpc_server.handler("CmdStatus")
        @curie_unix_agent_api_handler
        def api_cmd_status(arg):
            return self.__api_cmd_status(arg)

        @self.__rpc_server.handler("CmdStop")
        @curie_unix_agent_api_handler
        def api_cmd_stop(arg):
            return self.__api_cmd_stop(arg)

        @self.__rpc_server.handler("CmdRemove")
        @curie_unix_agent_api_handler
        def api_cmd_remove(arg):
            return self.__api_cmd_remove(arg)

        @self.__rpc_server.handler("CmdList")
        @curie_unix_agent_api_handler
        def api_cmd_list(arg):
            return self.__api_cmd_list(arg)

        @self.__rpc_server.handler("FileGet")
        @curie_unix_agent_api_handler
        def api_file_get(arg):
            return self.__api_file_get(arg)