示例#1
0
    def makeCluster(self, rack_controller, *commands):
        """Make and add a new stub cluster connection with the `commands`.

        See `make_amp_protocol_factory` for details.

        Note that if the ``Authenticate`` call is not amongst `commands`,
        it will be added. In addition, its action is to call
        ``RegisterRackController`` so the connction is fully made and the
        connection wil be added to the RPC service's list of connections.

        :return: The protocol instance created.
        """
        if cluster.Authenticate not in commands:
            commands = commands + (cluster.Authenticate, )
        protocol_factory = make_amp_protocol_factory(*commands)
        protocol = protocol_factory()

        protocol.Authenticate.side_effect = (
            lambda _, message: authenticate_with_secret(self.secret, message))
        self.addCluster(protocol, rack_controller).wait(5)
        # The connection is now established, but there is a brief
        # handshake that takes place immediately upon connection.  We
        # wait for that to finish before returning.
        getClientFor(rack_controller.system_id, timeout=5)
        return protocol
示例#2
0
 def sync_rack(system_id, sources, proxy):
     d = getClientFor(system_id, timeout=1)
     d.addCallback(lambda client: client(ImportBootImages,
                                         sources=sources,
                                         http_proxy=proxy,
                                         https_proxy=proxy))
     return d
示例#3
0
def get_preseed_data(preseed_type, node, token, metadata_url):
    """Obtain optional preseed data for this OS, preseed type, and node.

    :param preseed_type: The type of preseed to compose.
    :param node: The node model instance.
    :param token: The token model instance.
    :param metadata_url: The URL where this node's metadata will be made
        available.

    :raises NoConnectionsAvailable: When no connections to the node's
        cluster are available for use.
    :raises NoSuchOperatingSystem: When the node's declared operating
        system is not known to its cluster.
    :raises NotImplementedError: When this node's OS does not want to
        define any OS-specific preseed data.
    :raises TimeoutError: If a response has not been received within 30
        seconds.
    """
    client = getClientFor(node.get_boot_rack_controller().system_id)
    call = client(GetPreseedData,
                  osystem=node.get_osystem(),
                  preseed_type=preseed_type,
                  node_system_id=node.system_id,
                  node_hostname=node.hostname,
                  consumer_key=token.consumer.key,
                  token_key=token.key,
                  token_secret=token.secret,
                  metadata_url=urlparse(metadata_url))
    return call.wait(30).get("data")
示例#4
0
    def query_power_state(self, request, system_id):
        """Query the power state of a node.

        Send a request to the node's power controller which asks it about
        the node's state.  The reply to this could be delayed by up to
        30 seconds while waiting for the power controller to respond.
        Use this method sparingly as it ties up an appserver thread
        while waiting.

        :param system_id: The node to query.
        :return: a dict whose key is "state" with a value of one of
            'on' or 'off'.

        Returns 404 if the node is not found.
        Returns 503 (with explanatory text) if the power state could not
        be queried.
        """
        node = get_object_or_404(Node, system_id=system_id)
        ng = node.nodegroup

        try:
            client = getClientFor(ng.uuid)
        except NoConnectionsAvailable:
            maaslog.error(
                "Unable to get RPC connection for cluster '%s' (%s)",
                ng.cluster_name, ng.uuid)
            raise PowerProblem("Unable to connect to cluster controller")

        try:
            power_info = node.get_effective_power_info()
        except UnknownPowerType as e:
            raise PowerProblem(e)
        if not power_info.can_be_started:
            raise PowerProblem("Power state is not queryable")

        call = client(
            PowerQuery, system_id=system_id, hostname=node.hostname,
            power_type=power_info.power_type,
            context=power_info.power_parameters)
        try:
            # Allow 30 seconds for the power query max as we're holding
            # up an appserver thread here.
            state = call.wait(30)
        except crochet.TimeoutError:
            maaslog.error(
                "%s: Timed out waiting for power response in Node.power_state",
                node.hostname)
            raise PowerProblem("Timed out waiting for power response")
        except (NotImplementedError, PowerActionFail) as e:
            raise PowerProblem(e)

        return state
示例#5
0
def get_boot_images(rack_controller):
    """Obtain the avaliable boot images of this rack controller.

    :param rack_controller: The RackController.

    :raises NoConnectionsAvailable: When no connections to the rack controller
        are available for use.
    :raises crochet.TimeoutError: If a response has not been received within
        30 seconds.
    """
    client = getClientFor(rack_controller.system_id, timeout=1)
    call = client(ListBootImages)
    return call.wait(30).get("images")
示例#6
0
def validate_dhcp_config(test_dhcp_snippet=None):
    """Validate a DHCPD config with uncommitted values.

    Gathers the DHCPD config from what is committed in the database, as well as
    DHCPD config which needs to be validated, and asks a rack controller to
    validate. Testing is done with dhcpd's builtin validation flag.

    :param test_dhcp_snippet: A DHCPSnippet which has not yet been committed to
        the database and needs to be validated.
    """

    # XXX ltrager 2016-03-28 - This only tests the existing config with new
    # DHCPSnippets but could be expanded to test changes to the config(e.g
    # subnets, omapi_key, interfaces, etc) before they are commited.

    def find_connected_rack(racks):
        connected_racks = [client.ident for client in getAllClients()]
        for rack in racks:
            if rack.system_id in connected_racks:
                return rack
        # The dhcpd.conf config rendered on a rack controller only contains
        # subnets and interfaces which can connect to that rack controller.
        # If no rack controller was found picking a random rack controller
        # which is connected will result in testing a config which does
        # not contain the values we are trying to test.
        raise ValidationError("Unable to validate DHCP config, "
                              "no available rack controller connected.")

    rack_controller = None
    # Test on the rack controller where the DHCPSnippet will be used
    if test_dhcp_snippet is not None:
        if test_dhcp_snippet.subnet is not None:
            rack_controller = find_connected_rack(
                RackController.objects.filter_by_subnets(
                    [test_dhcp_snippet.subnet]))
        elif test_dhcp_snippet.node is not None:
            rack_controller = find_connected_rack(
                test_dhcp_snippet.node.get_boot_rack_controllers())
    # If no rack controller is linked to the DHCPSnippet its a global DHCP
    # snippet which we can test anywhere.
    if rack_controller is None:
        try:
            client = getRandomClient()
        except NoConnectionsAvailable:
            raise ValidationError("Unable to validate DHCP config, "
                                  "no available rack controller connected.")
        rack_controller = RackController.objects.get(system_id=client.ident)
    else:
        try:
            client = getClientFor(rack_controller.system_id)
        except NoConnectionsAvailable:
            raise ValidationError("Unable to validate DHCP config, "
                                  "no available rack controller connected.")
        rack_controller = RackController.objects.get(system_id=client.ident)

    # Get configuration for both IPv4 and IPv6.
    config = get_dhcp_configuration(rack_controller, test_dhcp_snippet)

    # Fix interfaces to go over the wire.
    interfaces_v4 = [{"name": name} for name in config.interfaces_v4]
    interfaces_v6 = [{"name": name} for name in config.interfaces_v6]

    # Validate both IPv4 and IPv6.
    v4_args = dict(
        omapi_key=config.omapi_key,
        failover_peers=config.failover_peers_v4,
        hosts=config.hosts_v4,
        interfaces=interfaces_v4,
        global_dhcp_snippets=config.global_dhcp_snippets,
        shared_networks=config.shared_networks_v4,
    )
    v6_args = dict(
        omapi_key=config.omapi_key,
        failover_peers=config.failover_peers_v6,
        hosts=config.hosts_v6,
        interfaces=interfaces_v6,
        global_dhcp_snippets=config.global_dhcp_snippets,
        shared_networks=config.shared_networks_v6,
    )

    # XXX: These remote calls can hold transactions open for a prolonged
    # period. This is bad for concurrency and scaling.
    v4_response = _validate_dhcp_config_v4(client, **v4_args).wait(30)
    v6_response = _validate_dhcp_config_v6(client, **v6_args).wait(30)

    # Deduplicate errors between IPv4 and IPv6
    known_errors = []
    unique_errors = []
    for errors in (v4_response["errors"], v6_response["errors"]):
        if errors is None:
            continue
        for error in errors:
            hash = "%s - %s" % (error["line"], error["error"])
            if hash not in known_errors:
                known_errors.append(hash)
                unique_errors.append(error)
    return unique_errors
示例#7
0
def configure_dhcp(rack_controller):
    """Write the DHCP configuration files and restart the DHCP servers.

    :raises: :py:class:`~.exceptions.NoConnectionsAvailable` when there
        are no open connections to the specified cluster controller.
    """
    # Let's get this out of the way first up shall we?
    if not settings.DHCP_CONNECT:
        # For the uninitiated, DHCP_CONNECT is set, by default, to False
        # in all tests and True in non-tests.  This avoids unnecessary
        # calls to async tasks.
        return

    # Get the client early; it's a cheap operation that may raise an
    # exception, meaning we can avoid some work if it fails.
    client = yield getClientFor(rack_controller.system_id)

    # Get configuration for both IPv4 and IPv6.
    config = yield deferToDatabase(get_dhcp_configuration, rack_controller)

    # Fix interfaces to go over the wire.
    interfaces_v4 = [{"name": name} for name in config.interfaces_v4]
    interfaces_v6 = [{"name": name} for name in config.interfaces_v6]

    # Configure both IPv4 and IPv6.
    ipv4_exc, ipv6_exc = None, None
    ipv4_status, ipv6_status = SERVICE_STATUS.UNKNOWN, SERVICE_STATUS.UNKNOWN

    try:
        yield _perform_dhcp_config(
            client,
            ConfigureDHCPv4_V2,
            ConfigureDHCPv4,
            failover_peers=config.failover_peers_v4,
            interfaces=interfaces_v4,
            shared_networks=config.shared_networks_v4,
            hosts=config.hosts_v4,
            global_dhcp_snippets=config.global_dhcp_snippets,
            omapi_key=config.omapi_key,
        )
    except Exception as exc:
        ipv4_exc = exc
        ipv4_status = SERVICE_STATUS.DEAD
        log.err(
            None,
            "Error configuring DHCPv4 on rack controller '%s (%s)': %s" %
            (rack_controller.hostname, rack_controller.system_id, exc),
        )
    else:
        if len(config.shared_networks_v4) > 0:
            ipv4_status = SERVICE_STATUS.RUNNING
        else:
            ipv4_status = SERVICE_STATUS.OFF
        log.msg(
            "Successfully configured DHCPv4 on rack controller '%s (%s)'." %
            (rack_controller.hostname, rack_controller.system_id))

    try:
        yield _perform_dhcp_config(
            client,
            ConfigureDHCPv6_V2,
            ConfigureDHCPv6,
            failover_peers=config.failover_peers_v6,
            interfaces=interfaces_v6,
            shared_networks=config.shared_networks_v6,
            hosts=config.hosts_v6,
            global_dhcp_snippets=config.global_dhcp_snippets,
            omapi_key=config.omapi_key,
        )
    except Exception as exc:
        ipv6_exc = exc
        ipv6_status = SERVICE_STATUS.DEAD
        log.err(
            None,
            "Error configuring DHCPv6 on rack controller '%s (%s)': %s" %
            (rack_controller.hostname, rack_controller.system_id, exc),
        )
    else:
        if len(config.shared_networks_v6) > 0:
            ipv6_status = SERVICE_STATUS.RUNNING
        else:
            ipv6_status = SERVICE_STATUS.OFF
        log.msg(
            "Successfully configured DHCPv6 on rack controller '%s (%s)'." %
            (rack_controller.hostname, rack_controller.system_id))

    # Update the status for both services so the user is always seeing the
    # most up to date status.
    @transactional
    def update_services():
        if ipv4_exc is None:
            ipv4_status_info = ""
        else:
            ipv4_status_info = str(ipv4_exc)
        if ipv6_exc is None:
            ipv6_status_info = ""
        else:
            ipv6_status_info = str(ipv6_exc)
        Service.objects.update_service_for(rack_controller, "dhcpd",
                                           ipv4_status, ipv4_status_info)
        Service.objects.update_service_for(rack_controller, "dhcpd6",
                                           ipv6_status, ipv6_status_info)

    yield deferToDatabase(update_services)

    # Raise the exceptions to the caller, it might want to retry. This raises
    # IPv4 before IPv6 if they both fail. No specific reason for this, if
    # the function is called again both will be performed.
    if ipv4_exc:
        raise ipv4_exc
    elif ipv6_exc:
        raise ipv6_exc
示例#8
0
def call_clusters(
        command, *, kwargs=None, timeout=10, controllers=None,
        ignore_errors=True, available_callback=_none,
        unavailable_callback=_none, success_callback=_none,
        failed_callback=_none, failure_callback=_none, timeout_callback=_none):
    """Make an RPC call to all rack controllers in parallel.

    Includes optional callbacks to report the status of the call for each
    controller. If the call was a success, the `success_callback` will be
    called immediately before the response is yielded (so that the caller
    can determine which controller was contacted successfully).

    All optional callbacks are called with a single argument: the
    `RackController` model object that corresponds to the RPC call.

    :param controllers: The :class:`RackController`s on which to make the RPC
        call. If None, defaults to all :class:`RackController`s.
    :param timeout: The maximum number of seconds to wait for responses from
        all controllers.
    :param command: An :class:`amp.Command` to call on the clusters.
    :param ignore_errors: If True, errors encountered whilst calling
        `command` on the clusters won't raise an exception.
    :param available_callback: Optional callback; called with the controller
        when an RPC connection to the controller was established.
    :param unavailable_callback: Optional callback; called with the controller
        when an RPC connection to the controller failed to be established.
    :param success_callback: Optional callback; called with the controller
        when the RPC call was a success and this method is about to yield the
        result.
    :param failed_callback: Optional callback; called with the controller if
        the RPC call fails.
    :param failure_callback: Optional callback; called with the `Failure`
        object if the RPC call fail with a well-known exception.
    :param timeout_callback: Optional callback; called if the RPC call
        fails with a timeout.
    :param kwargs: Optional keyword arguments to pass to the command
    :return: A generator of results, i.e. the dicts returned by the RPC
        call.
    :raises: :py:class:`ClusterUnavailable` when a cluster is not
        connected or there's an error during the call, and errors are
        not being ignored.
    """
    # Get the name of the RPC function for logging purposes. Each RPC function
    # is enacapsulated in a `class`, so should have a corresponding `__name__`.
    # However, we don't want to crash if that isn't the case.
    if kwargs is None:
        kwargs = {}
    command_name = (
        command.commandName.decode("ascii") if hasattr(command, 'commandName')
        else "<unknown>")
    calls = {}
    if controllers is None:
        controllers = RackController.objects.all()
    for controller in controllers:
        try:
            client = getClientFor(controller.system_id)
        except NoConnectionsAvailable:
            logger.error(
                "Error while calling %s: Unable to get RPC connection for "
                "rack controller '%s' (%s).", command_name,
                controller.hostname, controller.system_id)
            unavailable_callback(controller)
            if not ignore_errors:
                raise ClusterUnavailable(
                    "Unable to get RPC connection for rack controller "
                    "'%s' (%s)" % (
                        controller.hostname, controller.system_id))
        else:
            # The call to partial() requires a `callable`, but `getClientFor()`
            # might return a `Deferred` if it runs in the reactor.
            assert callable(client), (
                "call_clusters() must not be called in the reactor thread. "
                "You probably want to use deferToDatabase().")
            available_callback(controller)
            call = partial(client, command, **kwargs)
            calls[call] = controller

    for call, response in async.gatherCallResults(calls, timeout=timeout):
        # When a call returns results, figure out which controller it came from
        # and remove it from the list, so we can report which controllers
        # timed out.
        controller = calls[call]
        del calls[call]
        if isinstance(response, Failure):
            # Create a nice message for logging purposes. We can rely on
            # the 'type' ivar being filled in with the Exception type in a
            # Failure object, so use that to get a nice version of the name.
            exception_class = response.type.__name__
            error = str(response.value).strip()
            if len(error) > 0:
                error = ": " + error
            human_readable_error = (
                "Exception during %s() on rack controller '%s' (%s): %s%s" % (
                    command_name, controller.hostname, controller.system_id,
                    exception_class, error))
            logger.warning(human_readable_error)
            # For failures, there are two callbacks: one for the controller
            # that failed, the second for the specific failure that occurred.
            failed_callback(controller)
            failure_callback(response)
            if not ignore_errors:
                raise ClusterUnavailable(human_readable_error)
        else:
            success_callback(controller)
            yield response
示例#9
0
 def test_getClientFor_service_not_running(self):
     return assert_fails_with(
         rpc.getClientFor(sentinel.uuid), exceptions.NoConnectionsAvailable
     )