Exemplo n.º 1
0
def convergence_remove_server_from_group(log, transaction_id, server_id,
                                         replace, purge, group, state):
    """
    Remove a specific server from the group, optionally decrementing the
    desired capacity.

    The server may just be scheduled for deletion, or it may be evicted from
    the group by removing otter-specific metdata from the server.

    :param log: A bound logger
    :param bytes trans_id: The transaction id for this operation.
    :param bytes server_id: The id of the server to be removed.
    :param bool replace: Should the server be replaced?
    :param bool purge: Should the server be deleted from Nova?
    :param group: The scaling group to remove a server from.
    :type group: :class:`~otter.models.interface.IScalingGroup`
    :param state: The current state of the group.
    :type state: :class:`~otter.models.interface.GroupState`

    :return: The updated state.
    :rtype: Effect of :class:`~otter.models.interface.GroupState`

    :raise: :class:`CannotDeleteServerBelowMinError` if the server cannot
        be deleted without replacement, and :class:`ServerNotFoundError` if
        there is no such server to be deleted.
    """
    effects = [_is_server_in_group(group, server_id)]
    if not replace:
        effects.append(_can_scale_down(group, server_id))

    # the (possibly) two checks can happen in parallel, but we want
    # ServerNotFoundError to take precedence over
    # CannotDeleteServerBelowMinError
    both_checks = yield parallel_all_errors(effects)
    for is_error, result in both_checks:
        if is_error:
            reraise(*result)

    # Remove the server
    if purge:
        eff = set_nova_metadata_item(server_id, *DRAINING_METADATA)
    else:
        eff = Effect(
            EvictServerFromScalingGroup(log=log,
                                        transaction_id=transaction_id,
                                        scaling_group=group,
                                        server_id=server_id))
    yield Effect(
        TenantScope(
            retry_effect(eff, retry_times(3), exponential_backoff_interval(2)),
            group.tenant_id))

    if not replace:
        yield do_return(assoc_obj(state, desired=state.desired - 1))
    else:
        yield do_return(state)
Exemplo n.º 2
0
def convergence_remove_server_from_group(
        log, transaction_id, server_id, replace, purge, group, state):
    """
    Remove a specific server from the group, optionally decrementing the
    desired capacity.

    The server may just be scheduled for deletion, or it may be evicted from
    the group by removing otter-specific metdata from the server.

    :param log: A bound logger
    :param bytes trans_id: The transaction id for this operation.
    :param bytes server_id: The id of the server to be removed.
    :param bool replace: Should the server be replaced?
    :param bool purge: Should the server be deleted from Nova?
    :param group: The scaling group to remove a server from.
    :type group: :class:`~otter.models.interface.IScalingGroup`
    :param state: The current state of the group.
    :type state: :class:`~otter.models.interface.GroupState`

    :return: The updated state.
    :rtype: Effect of :class:`~otter.models.interface.GroupState`

    :raise: :class:`CannotDeleteServerBelowMinError` if the server cannot
        be deleted without replacement, and :class:`ServerNotFoundError` if
        there is no such server to be deleted.
    """
    effects = [_is_server_in_group(group, server_id)]
    if not replace:
        effects.append(_can_scale_down(group, server_id))

    # the (possibly) two checks can happen in parallel, but we want
    # ServerNotFoundError to take precedence over
    # CannotDeleteServerBelowMinError
    both_checks = yield parallel_all_errors(effects)
    for is_error, result in both_checks:
        if is_error:
            reraise(*result)

    # Remove the server
    if purge:
        eff = set_nova_metadata_item(server_id, *DRAINING_METADATA)
    else:
        eff = Effect(
            EvictServerFromScalingGroup(log=log,
                                        transaction_id=transaction_id,
                                        scaling_group=group,
                                        server_id=server_id))
    yield Effect(TenantScope(
        retry_effect(eff, retry_times(3), exponential_backoff_interval(2)),
        group.tenant_id))

    if not replace:
        yield do_return(assoc_obj(state, desired=state.desired - 1))
    else:
        yield do_return(state)
Exemplo n.º 3
0
 def _acquire_loop(self, blocking, timeout):
     acquired = yield self.is_acquired_eff()
     if acquired or not blocking:
         yield do_return(acquired)
     start = yield Effect(Func(time.time))
     while True:
         yield Effect(Delay(self._interval))
         if (yield self.is_acquired_eff()):
             yield do_return(True)
         if timeout is not None:
             now = yield Effect(Func(time.time))
             if now - start > timeout:
                 raise LockTimeout("Failed to acquire lock on {} in {} seconds".format(self.path, now - start))
Exemplo n.º 4
0
def step(state):
    yield display(render(state))
    try:
        user_input = yield Effect(Prompt("> "))
        cmd, arg = parse(user_input)
        result = dispatch(state, cmd, arg)
        yield display("Okay.")
        yield do_return(result)
    except (EOFError, KeyboardInterrupt):
        yield display("\nThanks for playing!")
        sys.exit(0)
    except Exception as e:
        yield display(str(e))
        yield do_return(state)
Exemplo n.º 5
0
 def _acquire_loop(self, blocking, timeout):
     acquired = yield self.is_acquired_eff()
     if acquired or not blocking:
         yield do_return(acquired)
     start = yield Effect(Func(time.time))
     while True:
         yield Effect(Delay(self._interval))
         if (yield self.is_acquired_eff()):
             yield do_return(True)
         if timeout is not None:
             now = yield Effect(Func(time.time))
             if now - start > timeout:
                 raise LockTimeout(
                     "Failed to acquire lock on {} in {} seconds".format(
                         self.path, now - start))
Exemplo n.º 6
0
    def is_acquired_eff(self):
        """
        Effect implementation of ``is_acquired``.

        :return: ``Effect`` of ``bool``
        """
        if self._node is None:
            yield do_return(False)
        children = yield Effect(GetChildren(self.path))
        if not children:
            yield do_return(False)
        # The last 10 characters are sequence number as per
        # https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming
        basename = self._node.rsplit("/")[-1]
        yield do_return(sorted(children, key=lambda c: c[-10:])[0] == basename)
Exemplo n.º 7
0
    def is_acquired_eff(self):
        """
        Effect implementation of ``is_acquired``.

        :return: ``Effect`` of ``bool``
        """
        if self._node is None:
            yield do_return(False)
        children = yield Effect(GetChildren(self.path))
        if not children:
            yield do_return(False)
        # The last 10 characters are sequence number as per
        # https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming
        basename = self._node.rsplit("/")[-1]
        yield do_return(sorted(children, key=lambda c: c[-10:])[0] == basename)
Exemplo n.º 8
0
def _execute_steps(steps):
    """
    Given a set of steps, executes them, logs the result, and returns the worst
    priority with a list of reasons for that result.

    :return: a tuple of (:class:`StepResult` constant., list of reasons)
    """
    if len(steps) > 0:
        results = yield steps_to_effect(steps)

        severity = [
            StepResult.FAILURE, StepResult.RETRY, StepResult.LIMITED_RETRY,
            StepResult.SUCCESS
        ]
        priority = sorted(results,
                          key=lambda (status, reasons): severity.index(status))
        worst_status = priority[0][0]
        results_to_log = [{
            'step': step,
            'result': result,
            'reasons': map(structure_reason, reasons)
        } for step, (result, reasons) in zip(steps, results)]
        reasons = reduce(operator.add,
                         (x[1] for x in results if x[0] == worst_status))
    else:
        worst_status = StepResult.SUCCESS
        results_to_log = reasons = []

    yield msg('execute-convergence-results',
              results=results_to_log,
              worst_status=worst_status.name)
    yield do_return((worst_status, reasons))
Exemplo n.º 9
0
def handle_take(request):
    item_name = request.form['item_name']
    state = yield Effect(LoadGame())
    st = take(state, item_name)
    if st is not None:
        yield Effect(SaveGame(state=st))
    yield do_return(redirect(url_for('root')))
Exemplo n.º 10
0
def handle_move(request):
    exit_name = request.form['exit_name']
    state = yield Effect(LoadGame())
    st = move(state, exit_name)
    if st is not None:
        yield Effect(SaveGame(state=st))
    yield do_return(redirect(url_for('root')))
Exemplo n.º 11
0
def convergence_exec_data(tenant_id, group_id, now, get_executor):
    """
    Get data required while executing convergence
    """
    sg_eff = Effect(GetScalingGroupInfo(tenant_id=tenant_id,
                                        group_id=group_id))

    (scaling_group, manifest) = yield sg_eff

    group_state = manifest['state']
    launch_config = manifest['launchConfiguration']

    executor = get_executor(launch_config)

    resources = yield executor.gather(tenant_id, group_id, now)

    if group_state.status == ScalingGroupStatus.DELETING:
        desired_capacity = 0
    else:
        desired_capacity = group_state.desired
        yield executor.update_cache(scaling_group, now, **resources)

    desired_group_state = executor.get_desired_group_state(
        group_id, launch_config, desired_capacity)

    yield do_return(
        (executor, scaling_group, group_state, desired_group_state, resources))
Exemplo n.º 12
0
def get_scaling_group_servers(tenant_id,
                              group_id,
                              now,
                              all_as_servers=get_all_scaling_group_servers,
                              all_servers=get_all_server_details,
                              cache_class=CassScalingGroupServersCache):
    """
    Get a group's servers taken from cache if it exists. Updates cache
    if it is empty from newly fetched servers
    # NOTE: This function takes tenant_id even though the whole effect is
    # scoped on the tenant because cache calls require tenant_id. Should
    # they also not take tenant_id and work on the scope?

    :return: Servers as list of dicts
    :rtype: Effect
    """
    cache = cache_class(tenant_id, group_id)
    cached_servers, last_update = yield cache.get_servers(False)
    if last_update is None:
        servers = (yield all_as_servers()).get(group_id, [])
    else:
        current = yield all_servers()
        servers = mark_deleted_servers(cached_servers, current)
        servers = list(filter(server_of_group(group_id), servers))
    yield do_return(servers)
Exemplo n.º 13
0
    def acquire_eff(self, blocking, timeout):
        """
        Effect implementation of ``acquire`` method.

        :return: ``Effect`` of ``bool``
        """
        try:
            # Before acquiring, lets delete any child node which may be
            # lingering from previous acquire. Ideally this should happen only
            # when acquire is called again before release. This shouldn't
            # happen this is called after release or after is_acquired returns
            # False. In any case, its the safest thing to do
            yield self.release_eff()
            try:
                yield Effect(CreateNode(self.path))
            except NodeExistsError:
                pass
            prefix = yield Effect(Func(uuid.uuid4))
            # TODO: https://github.com/rackerlabs/otter/issues/1926
            create_intent = CreateNode(
                "{}/{}".format(self.path, prefix), value=self.identifier, ephemeral=True, sequence=True
            )
            self._node = yield Effect(create_intent)
            acquired = yield self._acquire_loop(blocking, timeout)
            if not acquired:
                yield self.release_eff()
            yield do_return(acquired)
        except Exception as e:
            yield self.release_eff()
            raise e
Exemplo n.º 14
0
def convergence_exec_data(tenant_id, group_id, now, get_executor):
    """
    Get data required while executing convergence
    """
    sg_eff = Effect(GetScalingGroupInfo(tenant_id=tenant_id,
                                        group_id=group_id))

    (scaling_group, manifest) = yield sg_eff

    group_state = manifest['state']
    launch_config = manifest['launchConfiguration']

    executor = get_executor(launch_config)

    resources = yield executor.gather(tenant_id, group_id, now)

    if group_state.status == ScalingGroupStatus.DELETING:
        desired_capacity = 0
    else:
        desired_capacity = group_state.desired
        # See [Convergence servers cache] comment on top of the file.
        yield executor.update_cache(scaling_group, now, **resources)

    desired_group_state = executor.get_desired_group_state(
        group_id, launch_config, desired_capacity)

    yield do_return((executor, scaling_group, group_state, desired_group_state,
                     resources))
Exemplo n.º 15
0
def convergence_failed(tenant_id, group_id, reasons, timedout=False):
    """
    Handle convergence failure

    :param str tenant_id: Tenant ID
    :param str group_id: Group ID
    :param reasons: List of :obj:`ErrorReason` objects
    :param bool timedout: Has convergence failed due to reason timing out?

    :return: convergence execution status
    :rtype: :obj:`ConvergenceIterationStatus`
    """
    yield Effect(LoadAndUpdateGroupStatus(tenant_id, group_id,
                                          ScalingGroupStatus.ERROR))
    presented_reasons = sorted(present_reasons(reasons))
    if len(presented_reasons) == 0:
        presented_reasons = [u"Unknown error occurred"]
    elif timedout:
        presented_reasons = ["Timed out: {}".format(reason)
                             for reason in presented_reasons]
    yield cf_err(
        'group-status-error', status=ScalingGroupStatus.ERROR.name,
        reasons=presented_reasons)
    yield Effect(UpdateGroupErrorReasons(tenant_id, group_id,
                                         presented_reasons))
    yield do_return(ConvergenceIterationStatus.Stop())
Exemplo n.º 16
0
def _execute_steps(steps):
    """
    Given a set of steps, executes them, logs the result, and returns the worst
    priority with a list of reasons for that result.

    :return: a tuple of (:class:`StepResult` constant,
                         list of :obj:`ErrorReason`)
    """
    if len(steps) > 0:
        results = yield steps_to_effect(steps)

        severity = [StepResult.FAILURE, StepResult.RETRY,
                    StepResult.LIMITED_RETRY, StepResult.SUCCESS]
        priority = sorted(results,
                          key=lambda (status, reasons): severity.index(status))
        worst_status = priority[0][0]
        results_to_log = [
            {'step': step,
             'result': result,
             'reasons': map(structure_reason, reasons)}
            for step, (result, reasons) in
            zip(steps, results)
        ]
        reasons = reduce(operator.add,
                         (x[1] for x in results if x[0] == worst_status))
    else:
        worst_status = StepResult.SUCCESS
        results_to_log = reasons = []

    yield msg('execute-convergence-results',
              results=results_to_log,
              worst_status=worst_status.name)
    yield do_return((worst_status, reasons))
Exemplo n.º 17
0
    def acquire_eff(self, blocking, timeout):
        """
        Effect implementation of ``acquire`` method.

        :return: ``Effect`` of ``bool``
        """
        try:
            # Before acquiring, lets delete any child node which may be
            # lingering from previous acquire. Ideally this should happen only
            # when acquire is called again before release. This shouldn't
            # happen this is called after release or after is_acquired returns
            # False. In any case, its the safest thing to do
            yield self.release_eff()
            try:
                yield Effect(CreateNode(self.path))
            except NodeExistsError:
                pass
            prefix = yield Effect(Func(uuid.uuid4))
            # TODO: https://github.com/rackerlabs/otter/issues/1926
            create_intent = CreateNode("{}/{}".format(self.path, prefix),
                                       value=self.identifier,
                                       ephemeral=True,
                                       sequence=True)
            self._node = yield Effect(create_intent)
            acquired = yield self._acquire_loop(blocking, timeout)
            if not acquired:
                yield self.release_eff()
            yield do_return(acquired)
        except Exception as e:
            yield self.release_eff()
            raise e
Exemplo n.º 18
0
def get_clb_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB
    health monitor information is also returned as a pmap of :obj:`CLB` objects
    mapped on LB ID.

    :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`)
    :rtype: :obj:`Effect`
    """
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    healthmon_reqs = [
        _retry(get_clb_health_monitor(lb_id).on(error=gone(None)))
        for lb_id in lb_ids]
    all_nodes_hms = yield parallel(node_reqs + healthmon_reqs)
    all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):]
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node)
                for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)}
    clbs = {
        str(lb_id): CLB(bool(health_mon))
        for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            node.drained_at = extract_clb_drained_at(feed)
        return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return((
        list(filter(bool, nodes)),
        pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
Exemplo n.º 19
0
def convergence_succeeded(executor, scaling_group, group_state, resources,
                          now):
    """
    Handle convergence success
    """
    if group_state.status == ScalingGroupStatus.DELETING:
        # servers have been deleted. Delete the group for real
        yield Effect(DeleteGroup(tenant_id=scaling_group.tenant_id,
                                 group_id=scaling_group.uuid))
        yield do_return(ConvergenceIterationStatus.GroupDeleted())
    elif group_state.status == ScalingGroupStatus.ERROR:
        yield Effect(UpdateGroupStatus(scaling_group=scaling_group,
                                       status=ScalingGroupStatus.ACTIVE))
        yield cf_msg('group-status-active',
                     status=ScalingGroupStatus.ACTIVE.name)
    # update servers cache with latest servers
    yield executor.update_cache(scaling_group, now, include_deleted=False,
                                **resources)
    yield do_return(ConvergenceIterationStatus.Stop())
Exemplo n.º 20
0
def convergence_succeeded(executor, scaling_group, group_state, resources):
    """
    Handle convergence success
    """
    if group_state.status == ScalingGroupStatus.DELETING:
        # servers have been deleted. Delete the group for real
        yield Effect(DeleteGroup(tenant_id=scaling_group.tenant_id,
                                 group_id=scaling_group.uuid))
        yield do_return(ConvergenceIterationStatus.GroupDeleted())
    elif group_state.status == ScalingGroupStatus.ERROR:
        yield Effect(UpdateGroupStatus(scaling_group=scaling_group,
                                       status=ScalingGroupStatus.ACTIVE))
        yield cf_msg('group-status-active',
                     status=ScalingGroupStatus.ACTIVE.name)
    # update servers cache with latest servers.
    # See [Convergence servers cache] comment on top of the file.
    now = yield Effect(Func(datetime.utcnow))
    yield executor.update_cache(scaling_group, now, include_deleted=False,
                                **resources)
    yield do_return(ConvergenceIterationStatus.Stop())
Exemplo n.º 21
0
def group_steps(group):
    """
    Return Effect of list of steps that would be performed on the group
    if convergence is triggered on it with desired=actual. Also returns
    current delta of desired and actual
    """
    now_dt = yield Effect(Func(datetime.utcnow))
    all_data_eff = convergence_exec_data(
        group["tenantId"], group["groupId"], now_dt, get_executor)
    try:
        all_data = yield Effect(TenantScope(all_data_eff, group["tenantId"]))
    except Exception as e:
        yield do_return((e, 0))
    (executor, scaling_group, group_state, desired_group_state,
     resources) = all_data
    delta = desired_group_state.capacity - len(resources['servers'])
    desired_group_state.capacity = len(resources['servers'])
    steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt),
                          3600, {}, **resources)
    yield do_return((steps, delta))
Exemplo n.º 22
0
def perform_get_children_with_stats(kz_client, dispatcher, intent):
    """
    Perform :obj:`GetChildrenWithStats`. Must be partialed with ``kz_client``.

    :param kz_client: txKazoo client
    :param dispatcher: dispatcher, supplied by perform
    :param GetChildrenWithStats intent: the intent
    """
    path = intent.path
    children = yield Effect(GetChildren(path))
    stats = yield parallel(Effect(GetStat(path + "/" + p)) for p in children)
    yield do_return([c_and_s for c_and_s in zip(children, stats) if c_and_s[1] is not None])
Exemplo n.º 23
0
def get_recently_converged_groups(recently_converged, interval):
    """
    Return a list of recently converged groups, and garbage-collect any groups
    in the recently_converged map that are no longer 'recent'.
    """
    # STM would be cool but this is synchronous so whatever
    recent = yield recently_converged.read()
    now = yield Effect(Func(time.time))
    to_remove = [group for group in recent if now - recent[group] > interval]
    cleaned = reduce(lambda m, g: m.remove(g), to_remove, recent)
    if recent != cleaned:
        yield recently_converged.modify(lambda _: cleaned)
    yield do_return(cleaned.keys())
Exemplo n.º 24
0
def get_todays_scaling_groups(convergence_tids, fname):
    """
    Get scaling groups that from tenants that are enabled till today
    """
    groups = yield Effect(GetAllGroups())
    non_conv_tenants = set(groups.keys()) - set(convergence_tids)
    last_tenants_len, last_date = yield get_last_info(fname)
    now = yield Effect(Func(datetime.utcnow))
    tenants, last_tenants_len, last_date = get_todays_tenants(
        non_conv_tenants, now, last_tenants_len, last_date)
    yield update_last_info(fname, last_tenants_len, last_date)
    yield do_return(
        keyfilter(lambda t: t in set(tenants + convergence_tids), groups))
Exemplo n.º 25
0
def get_recently_converged_groups(recently_converged, interval):
    """
    Return a list of recently converged groups, and garbage-collect any groups
    in the recently_converged map that are no longer 'recent'.
    """
    # STM would be cool but this is synchronous so whatever
    recent = yield recently_converged.read()
    now = yield Effect(Func(time.time))
    to_remove = [group for group in recent if now - recent[group] > interval]
    cleaned = reduce(lambda m, g: m.remove(g), to_remove, recent)
    if recent != cleaned:
        yield recently_converged.modify(lambda _: cleaned)
    yield do_return(cleaned.keys())
Exemplo n.º 26
0
def perform_get_children_with_stats(kz_client, dispatcher, intent):
    """
    Perform :obj:`GetChildrenWithStats`. Must be partialed with ``kz_client``.

    :param kz_client: txKazoo client
    :param dispatcher: dispatcher, supplied by perform
    :param GetChildrenWithStats intent: the intent
    """
    path = intent.path
    children = yield Effect(GetChildren(path))
    stats = yield parallel(Effect(GetStat(path + '/' + p)) for p in children)
    yield do_return([
        c_and_s for c_and_s in zip(children, stats) if c_and_s[1] is not None
    ])
Exemplo n.º 27
0
def read_entries(service_type,
                 url,
                 params,
                 direction,
                 follow_limit=100,
                 log_msg_type=None):
    """
    Read all feed entries and follow in given direction until it is empty

    :param service_type: Service hosting the feed
    :type service_type: A member of :class:`ServiceType`
    :param str url: CF URL to append
    :param dict params: HTTP parameters
    :param direction: Where to continue fetching?
    :type direction: A member of :class:`Direction`
    :param int follow_limit: Maximum number of times to follow in given
        direction

    :return: (``list`` of :obj:`Element`, last fetched params) tuple
    """
    if direction == Direction.PREVIOUS:
        direction_link = atom.previous_link
    elif direction == Direction.NEXT:
        direction_link = atom.next_link
    else:
        raise ValueError("Invalid direction")

    if log_msg_type is not None:
        log_cb = log_success_response(log_msg_type, identity, False)
    else:
        log_cb = identity

    all_entries = []
    while follow_limit > 0:
        resp, feed_str = yield service_request(service_type,
                                               "GET",
                                               url,
                                               params=params,
                                               json_response=False).on(log_cb)
        feed = atom.parse(feed_str)
        entries = atom.entries(feed)
        if entries == []:
            break
        all_entries.extend(entries)
        link = direction_link(feed)
        if link is None:
            break
        params = parse_qs(urlparse(link).query)
        follow_limit -= 1
    yield do_return((all_entries, params))
Exemplo n.º 28
0
def convergence_failed(scaling_group, reasons):
    """
    Handle convergence failure
    """
    yield Effect(UpdateGroupStatus(scaling_group=scaling_group,
                                   status=ScalingGroupStatus.ERROR))
    presented_reasons = sorted(present_reasons(reasons))
    if len(presented_reasons) == 0:
        presented_reasons = [u"Unknown error occurred"]
    yield cf_err(
        'group-status-error', status=ScalingGroupStatus.ERROR.name,
        reasons=presented_reasons)
    yield Effect(UpdateGroupErrorReasons(scaling_group, presented_reasons))
    yield do_return(ConvergenceIterationStatus.Stop())
Exemplo n.º 29
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""

    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [
        _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids
    ]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)
    }
    draining = [
        n for n in concat(lb_nodes.values())
        if n.description.condition == CLBNodeCondition.DRAINING
    ]
    feeds = yield parallel([
        _retry(
            get_clb_node_feed(n.description.lb_id,
                              n.node_id).on(error=gone(None)))
        for n in draining
    ])
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id for (node, feed) in nodes_to_feeds.items()
        if feed is None
    ])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))
Exemplo n.º 30
0
def call_if_acquired(lock, eff):
    """
    Call ``eff`` if ``lock`` is acquired. If not, try to acquire the lock
    and call ``eff``. This function is different from
    :func:`otter.util.deferredutils.with_lock` where this does not release
    the lock after calling ``func``. Also it expects that lock may already be
    acquired.

    :param lock: Lock object from :obj:`TxKazooClient`
    :param eff: ``Effect`` to call if lock is/was acquired

    :return: (eff return, lock acquired bool) tuple. first element may be
        ``NOT_CALLED`` of eff was not called
    :rtype: ``Effect`` of ``bool``
    """
    if (yield lock.is_acquired_eff()):
        ret = yield eff
        yield do_return((ret, False))
    else:
        if (yield lock.acquire_eff(False, None)):
            ret = yield eff
            yield do_return((ret, True))
        else:
            yield do_return((NOT_CALLED, False))
Exemplo n.º 31
0
def group_steps(group):
    """
    Return Effect of list of steps that would be performed on the group
    if convergence is triggered on it with desired=actual
    """
    now_dt = yield Effect(Func(datetime.utcnow))
    all_data_eff = convergence_exec_data(group["tenantId"], group["groupId"],
                                         now_dt, get_executor)
    all_data = yield Effect(TenantScope(all_data_eff, group["tenantId"]))
    (executor, scaling_group, group_state, desired_group_state,
     resources) = all_data
    desired_group_state.desired = len(resources['servers'])
    steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt), 3600,
                          **resources)
    yield do_return(steps)
Exemplo n.º 32
0
def group_steps(group):
    """
    Return Effect of list of steps that would be performed on the group
    if convergence is triggered on it with desired=actual
    """
    now_dt = yield Effect(Func(datetime.utcnow))
    all_data_eff = convergence_exec_data(
        group["tenantId"], group["groupId"], now_dt, get_executor)
    all_data = yield Effect(TenantScope(all_data_eff, group["tenantId"]))
    (executor, scaling_group, group_state, desired_group_state,
     resources) = all_data
    desired_group_state.desired = len(resources['servers'])
    steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt),
                          3600, **resources)
    yield do_return(steps)
Exemplo n.º 33
0
def call_if_acquired(lock, eff):
    """
    Call ``eff`` if ``lock`` is acquired. If not, try to acquire the lock
    and call ``eff``. This function is different from
    :func:`otter.util.deferredutils.with_lock` where this does not release
    the lock after calling ``func``. Also it expects that lock may already be
    acquired.

    :param lock: Lock object from :obj:`TxKazooClient`
    :param eff: ``Effect`` to call if lock is/was acquired

    :return: (eff return, lock acquired bool) tuple. first element may be
        ``NOT_CALLED`` of eff was not called
    :rtype: ``Effect`` of ``bool``
    """
    if (yield lock.is_acquired_eff()):
        ret = yield eff
        yield do_return((ret, False))
    else:
        if (yield lock.acquire_eff(False, None)):
            ret = yield eff
            yield do_return((ret, True))
        else:
            yield do_return((NOT_CALLED, False))
Exemplo n.º 34
0
 def converge(tenant_id, group_id, dirty_flag):
     stat = yield Effect(GetStat(dirty_flag))
     # If the node disappeared, ignore it. `stat` will be None here if the
     # divergent flag was discovered only after the group is removed from
     # currently_converging, but before the divergent flag is deleted, and
     # then the deletion happens, and then our GetStat happens. This
     # basically means it happens when one convergence is starting as
     # another one for the same group is ending.
     if stat is None:
         yield msg('converge-divergent-flag-disappeared', znode=dirty_flag)
     else:
         eff = converge_one_group(currently_converging, recently_converged,
                                  waiting, tenant_id, group_id,
                                  stat.version, build_timeout,
                                  limited_retry_iterations, step_limits)
         result = yield Effect(TenantScope(eff, tenant_id))
         yield do_return(result)
Exemplo n.º 35
0
 def converge(tenant_id, group_id, dirty_flag):
     stat = yield Effect(GetStat(dirty_flag))
     # If the node disappeared, ignore it. `stat` will be None here if the
     # divergent flag was discovered only after the group is removed from
     # currently_converging, but before the divergent flag is deleted, and
     # then the deletion happens, and then our GetStat happens. This
     # basically means it happens when one convergence is starting as
     # another one for the same group is ending.
     if stat is None:
         yield msg('converge-divergent-flag-disappeared', znode=dirty_flag)
     else:
         eff = converge_one_group(currently_converging, recently_converged,
                                  waiting,
                                  tenant_id, group_id,
                                  stat.version, build_timeout,
                                  limited_retry_iterations, step_limits)
         result = yield Effect(TenantScope(eff, tenant_id))
         yield do_return(result)
Exemplo n.º 36
0
def read_entries(service_type, url, params, direction, follow_limit=100,
                 log_msg_type=None):
    """
    Read all feed entries and follow in given direction until it is empty

    :param service_type: Service hosting the feed
    :type service_type: A member of :class:`ServiceType`
    :param str url: CF URL to append
    :param dict params: HTTP parameters
    :param direction: Where to continue fetching?
    :type direction: A member of :class:`Direction`
    :param int follow_limit: Maximum number of times to follow in given
        direction

    :return: (``list`` of :obj:`Element`, last fetched params) tuple
    """
    if direction == Direction.PREVIOUS:
        direction_link = atom.previous_link
    elif direction == Direction.NEXT:
        direction_link = atom.next_link
    else:
        raise ValueError("Invalid direction")

    if log_msg_type is not None:
        log_cb = log_success_response(log_msg_type, identity, False)
    else:
        log_cb = identity

    all_entries = []
    while follow_limit > 0:
        resp, feed_str = yield service_request(
            service_type, "GET", url, params=params,
            json_response=False).on(log_cb)
        feed = atom.parse(feed_str)
        entries = atom.entries(feed)
        if entries == []:
            break
        all_entries.extend(entries)
        link = direction_link(feed)
        if link is None:
            break
        params = parse_qs(urlparse(link).query)
        follow_limit -= 1
    yield do_return((all_entries, params))
Exemplo n.º 37
0
def convergence_failed(scaling_group, reasons, timedout=False):
    """
    Handle convergence failure
    """
    yield Effect(
        UpdateGroupStatus(scaling_group=scaling_group,
                          status=ScalingGroupStatus.ERROR))
    presented_reasons = sorted(present_reasons(reasons))
    if len(presented_reasons) == 0:
        presented_reasons = [u"Unknown error occurred"]
    elif timedout:
        presented_reasons = [
            "Timed out: {}".format(reason) for reason in presented_reasons
        ]
    yield cf_err('group-status-error',
                 status=ScalingGroupStatus.ERROR.name,
                 reasons=presented_reasons)
    yield Effect(UpdateGroupErrorReasons(scaling_group, presented_reasons))
    yield do_return(ConvergenceIterationStatus.Stop())
Exemplo n.º 38
0
def non_concurrently(locks, key, eff):
    """
    Run some Effect non-concurrently.

    :param Reference locks: A reference to a PSet that will be used to record
        which operations are currently being executed.
    :param key: the key to use for this particular operation, which will be
        stored in ``locks``
    :param Effect eff: the effect to execute.

    :return: Effect with the result of ``eff``, or an error of
        :obj:`ConcurrentError` if the given key already has an associated
        effect being performed.
    """
    if key in (yield locks.read()):
        raise ConcurrentError(key)
    yield locks.modify(lambda cc: cc.add(key))
    try:
        result = yield eff
    finally:
        yield locks.modify(lambda cc: cc.remove(key))
    yield do_return(result)
Exemplo n.º 39
0
def get_clb_contents():
    """Get Rackspace Cloud Load Balancer contents as list of `CLBNode`."""
    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)
    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [_retry(get_clb_nodes(lb_id).on(error=gone([])))
                 for lb_id in lb_ids]
    all_nodes = yield parallel(node_reqs)
    lb_nodes = {lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
                for lb_id, nodes in zip(lb_ids, all_nodes)}
    draining = [n for n in concat(lb_nodes.values())
                if n.description.condition == CLBNodeCondition.DRAINING]
    feeds = yield parallel(
        [_retry(get_clb_node_feed(n.description.lb_id, n.node_id).on(
            error=gone(None)))
         for n in draining]
    )
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id
        for (node, feed) in nodes_to_feeds.items() if feed is None])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            return assoc_obj(node, drained_at=extract_CLB_drained_at(feed))
        else:
            return node
    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return(list(filter(bool, nodes)))
Exemplo n.º 40
0
def non_concurrently(locks, key, eff):
    """
    Run some Effect non-concurrently.

    :param Reference locks: A reference to a PSet that will be used to record
        which operations are currently being executed.
    :param key: the key to use for this particular operation, which will be
        stored in ``locks``
    :param Effect eff: the effect to execute.

    :return: Effect with the result of ``eff``, or an error of
        :obj:`ConcurrentError` if the given key already has an associated
        effect being performed.
    """
    if key in (yield locks.read()):
        raise ConcurrentError(key)
    yield locks.modify(lambda cc: cc.add(key))
    try:
        result = yield eff
    finally:
        yield locks.modify(lambda cc: cc.remove(key))
    yield do_return(result)
Exemplo n.º 41
0
def get_scaling_group_servers(tenant_id, group_id, now,
                              all_as_servers=get_all_scaling_group_servers,
                              all_servers=get_all_server_details,
                              cache_class=CassScalingGroupServersCache):
    """
    Get a group's servers taken from cache if it exists. Updates cache
    if it is empty from newly fetched servers
    # NOTE: This function takes tenant_id even though the whole effect is
    # scoped on the tenant because cache calls require tenant_id. Should
    # they also not take tenant_id and work on the scope?

    :return: Servers as list of dicts
    :rtype: Effect
    """
    cache = cache_class(tenant_id, group_id)
    cached_servers, last_update = yield cache.get_servers(False)
    if last_update is None:
        servers = (yield all_as_servers()).get(group_id, [])
    else:
        current = yield all_servers()
        servers = mark_deleted_servers(cached_servers, current)
        servers = list(filter(server_of_group(group_id), servers))
    yield do_return(servers)
Exemplo n.º 42
0
def get_clb_node_feed(lb_id, node_id):
    """
    Get the atom feed associated with a CLB node.

    :param int lb_id: Cloud Load balancer ID
    :param int node_id: Node ID of in loadbalancer node

    :returns: Effect of ``list`` of atom entry :class:`Element`
    :rtype: ``Effect``
    """
    all_entries = []
    params = {}
    while True:
        feed_str = yield _node_feed_page(lb_id, node_id, params)
        feed = atom.parse(feed_str)
        entries = atom.entries(feed)
        if entries == []:
            break
        all_entries.extend(entries)
        next_link = atom.next_link(feed)
        if not next_link:
            break
        params = parse_qs(urlparse(next_link).query)
    yield do_return(all_entries)
Exemplo n.º 43
0
def execute_convergence(tenant_id, group_id, build_timeout, waiting,
                        limited_retry_iterations, step_limits,
                        get_executor=get_executor):
    """
    Gather data, plan a convergence, save active and pending servers to the
    group state, and then execute the convergence.

    :param str tenant_id: the tenant ID for the group to converge
    :param str group_id: the ID of the group to be converged
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param Reference waiting: pmap of waiting groups
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param dict step_limits: Mapping of step class to number of executions
        allowed in a convergence cycle
    :param callable get_executor: like :func`get_executor`, used for testing.

    :return: Effect of :obj:`ConvergenceIterationStatus`.
    :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist.
    """
    clean_waiting = _clean_waiting(waiting, group_id)

    # Begin convergence by updating group status to ACTIVE
    yield msg("begin-convergence")
    try:
        yield Effect(LoadAndUpdateGroupStatus(tenant_id, group_id,
                                              ScalingGroupStatus.ACTIVE))
    except NoSuchScalingGroupError:
        # Expected for DELETING group. Ignore.
        pass

    # Gather data
    now_dt = yield Effect(Func(datetime.utcnow))
    try:
        all_data = yield msg_with_time(
            "gather-convergence-data",
            convergence_exec_data(tenant_id, group_id, now_dt,
                                  get_executor=get_executor))
        (executor, scaling_group, group_state, desired_group_state,
         resources) = all_data
    except FirstError as fe:
        if fe.exc_info[0] is NoSuchEndpoint:
            result = yield convergence_failed(
                tenant_id, group_id, [ErrorReason.Exception(fe.exc_info)])
            yield do_return(result)
        raise fe

    # prepare plan
    steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt),
                          build_timeout, step_limits, **resources)
    yield log_steps(steps)

    # Execute plan
    yield msg('execute-convergence',
              steps=steps, now=now_dt, desired=desired_group_state,
              **resources)
    worst_status, reasons = yield _execute_steps(steps)

    if worst_status != StepResult.LIMITED_RETRY:
        # If we're not waiting any more, there's no point in keeping track of
        # the group
        yield clean_waiting

    # Handle the status from execution
    if worst_status == StepResult.SUCCESS:
        result = yield convergence_succeeded(
            executor, scaling_group, group_state, resources)
    elif worst_status == StepResult.FAILURE:
        result = yield convergence_failed(tenant_id, group_id, reasons)
    elif worst_status is StepResult.LIMITED_RETRY:
        # We allow further iterations to proceed as long as we haven't been
        # waiting for a LIMITED_RETRY for N consecutive iterations.
        current_iterations = (yield waiting.read()).get(group_id, 0)
        if current_iterations > limited_retry_iterations:
            yield msg('converge-limited-retry-too-long')
            yield clean_waiting
            # Prefix "Timed out" to all limited retry reasons
            result = yield convergence_failed(tenant_id, group_id, reasons,
                                              True)
        else:
            yield waiting.modify(
                lambda group_iterations:
                    group_iterations.set(group_id, current_iterations + 1))
            result = ConvergenceIterationStatus.Continue()
    else:
        result = ConvergenceIterationStatus.Continue()
    yield do_return(result)
Exemplo n.º 44
0
def challenge():
    line = None
    while line != 'To seek the Holy Grail.\n':
        yield Effect(Print('What... is your quest?'))
        line = yield Effect(Readline())
    yield do_return(line)
Exemplo n.º 45
0
def increment(request):
    num = int(request.form['number'])
    yield Effect(Func(increment_counter, num))
    yield do_return(redirect(url_for('root')))
Exemplo n.º 46
0
def converge_all_groups(currently_converging,
                        recently_converged,
                        waiting,
                        my_buckets,
                        all_buckets,
                        divergent_flags,
                        build_timeout,
                        interval,
                        limited_retry_iterations,
                        step_limits,
                        converge_one_group=converge_one_group):
    """
    Check for groups that need convergence and which match up to the
    buckets we've been allocated.

    :param Reference currently_converging: pset of currently converging groups
    :param Reference recently_converged: pmap of group ID to time last
        convergence finished
    :param Reference waiting: pmap of group ID to number of iterations already
        waited
    :param my_buckets: The buckets that should be checked for group IDs to
        converge on.
    :param all_buckets: The set of all buckets that can be checked for group
        IDs to converge on.  ``my_buckets`` should be a subset of this.
    :param divergent_flags: divergent flags that were found in zookeeper.
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param number interval: number of seconds between attempts at convergence.
        Groups will not be converged if less than this amount of time has
        passed since the end of its last convergence.
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param dict step_limits: Mapping of step class to number of executions
        allowed in a convergence cycle
    :param callable converge_one_group: function to use to converge a single
        group - to be used for test injection only
    """
    group_infos = get_my_divergent_groups(my_buckets, all_buckets,
                                          divergent_flags)
    # filter out currently converging groups
    cc = yield currently_converging.read()
    group_infos = [info for info in group_infos if info['group_id'] not in cc]
    if not group_infos:
        return
    yield msg('converge-all-groups',
              group_infos=group_infos,
              currently_converging=list(cc))

    @do
    def converge(tenant_id, group_id, dirty_flag):
        stat = yield Effect(GetStat(dirty_flag))
        # If the node disappeared, ignore it. `stat` will be None here if the
        # divergent flag was discovered only after the group is removed from
        # currently_converging, but before the divergent flag is deleted, and
        # then the deletion happens, and then our GetStat happens. This
        # basically means it happens when one convergence is starting as
        # another one for the same group is ending.
        if stat is None:
            yield msg('converge-divergent-flag-disappeared', znode=dirty_flag)
        else:
            eff = converge_one_group(currently_converging, recently_converged,
                                     waiting, tenant_id, group_id,
                                     stat.version, build_timeout,
                                     limited_retry_iterations, step_limits)
            result = yield Effect(TenantScope(eff, tenant_id))
            yield do_return(result)

    recent_groups = yield get_recently_converged_groups(
        recently_converged, interval)
    effs = []
    for info in group_infos:
        tenant_id, group_id = info['tenant_id'], info['group_id']
        if group_id in recent_groups:
            # Don't converge a group if it has recently been converged.
            continue
        eff = converge(tenant_id, group_id, info['dirty-flag'])
        effs.append(
            with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id))

    yield do_return(parallel(effs))
Exemplo n.º 47
0
def root(request):
    counter = yield Effect(Func(get_counter))
    yield do_return(render_template('counter.html', counter=counter))
Exemplo n.º 48
0
def execute_convergence(tenant_id,
                        group_id,
                        build_timeout,
                        waiting,
                        limited_retry_iterations,
                        step_limits,
                        get_executor=get_executor):
    """
    Gather data, plan a convergence, save active and pending servers to the
    group state, and then execute the convergence.

    :param str tenant_id: the tenant ID for the group to converge
    :param str group_id: the ID of the group to be converged
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param Reference waiting: pmap of waiting groups
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param dict step_limits: Mapping of step class to number of executions
        allowed in a convergence cycle
    :param callable get_executor: like :func`get_executor`, used for testing.

    :return: Effect of :obj:`ConvergenceIterationStatus`.
    :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist.
    """
    clean_waiting = _clean_waiting(waiting, group_id)
    # Gather data
    yield msg("begin-convergence")
    now_dt = yield Effect(Func(datetime.utcnow))
    all_data = yield msg_with_time(
        "gather-convergence-data",
        convergence_exec_data(tenant_id,
                              group_id,
                              now_dt,
                              get_executor=get_executor))
    (executor, scaling_group, group_state, desired_group_state,
     resources) = all_data

    # prepare plan
    steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt),
                          build_timeout, step_limits, **resources)
    yield log_steps(steps)

    # Execute plan
    yield msg('execute-convergence',
              steps=steps,
              now=now_dt,
              desired=desired_group_state,
              **resources)
    worst_status, reasons = yield _execute_steps(steps)

    if worst_status != StepResult.LIMITED_RETRY:
        # If we're not waiting any more, there's no point in keeping track of
        # the group
        yield clean_waiting

    # Handle the status from execution
    if worst_status == StepResult.SUCCESS:
        result = yield convergence_succeeded(executor, scaling_group,
                                             group_state, resources, now_dt)
    elif worst_status == StepResult.FAILURE:
        result = yield convergence_failed(scaling_group, reasons)
    elif worst_status is StepResult.LIMITED_RETRY:
        # We allow further iterations to proceed as long as we haven't been
        # waiting for a LIMITED_RETRY for N consecutive iterations.
        current_iterations = (yield waiting.read()).get(group_id, 0)
        if current_iterations > limited_retry_iterations:
            yield msg('converge-limited-retry-too-long')
            yield clean_waiting
            # Prefix "Timed out" to all limited retry reasons
            result = yield convergence_failed(scaling_group, reasons, True)
        else:
            yield waiting.modify(lambda group_iterations: group_iterations.set(
                group_id, current_iterations + 1))
            result = ConvergenceIterationStatus.Continue()
    else:
        result = ConvergenceIterationStatus.Continue()
    yield do_return(result)
Exemplo n.º 49
0
def execute_convergence(tenant_id, group_id, build_timeout, waiting,
                        limited_retry_iterations, get_executor=get_executor):
    """
    Gather data, plan a convergence, save active and pending servers to the
    group state, and then execute the convergence.

    :param str tenant_id: the tenant ID for the group to converge
    :param str group_id: the ID of the group to be converged
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param Reference waiting: pmap of waiting groups
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param callable get_all_convergence_data: like
        :func`get_all_convergence_data`, used for testing.
    :param callable plan: like :func:`plan`, to be used for test injection only

    :return: Effect of :obj:`ConvergenceIterationStatus`.
    :raise: :obj:`NoSuchScalingGroupError` if the group doesn't exist.
    """
    clean_waiting = _clean_waiting(waiting, group_id)
    # Gather data
    yield msg("begin-convergence")
    now_dt = yield Effect(Func(datetime.utcnow))
    all_data = yield msg_with_time(
        "gather-convergence-data",
        convergence_exec_data(tenant_id, group_id, now_dt,
                              get_executor=get_executor))
    (executor, scaling_group, group_state, desired_group_state,
     resources) = all_data

    # prepare plan
    steps = executor.plan(desired_group_state, datetime_to_epoch(now_dt),
                          build_timeout, **resources)
    yield log_steps(steps)

    # Execute plan
    yield msg('execute-convergence',
              steps=steps, now=now_dt, desired=desired_group_state,
              **resources)
    worst_status, reasons = yield _execute_steps(steps)

    if worst_status != StepResult.LIMITED_RETRY:
        # If we're not waiting any more, there's no point in keeping track of
        # the group
        yield clean_waiting

    # Handle the status from execution
    if worst_status == StepResult.SUCCESS:
        result = yield convergence_succeeded(
            executor, scaling_group, group_state, resources, now_dt)
    elif worst_status == StepResult.FAILURE:
        result = yield convergence_failed(scaling_group, reasons)
    elif worst_status is StepResult.LIMITED_RETRY:
        # We allow further iterations to proceed as long as we haven't been
        # waiting for a LIMITED_RETRY for N consecutive iterations.
        current_iterations = (yield waiting.read()).get(group_id, 0)
        if current_iterations > limited_retry_iterations:
            yield msg('converge-limited-retry-too-long')
            yield clean_waiting
            result = yield convergence_failed(scaling_group, reasons)
        else:
            yield waiting.modify(
                lambda group_iterations:
                    group_iterations.set(group_id, current_iterations + 1))
            result = ConvergenceIterationStatus.Continue()
    else:
        result = ConvergenceIterationStatus.Continue()
    yield do_return(result)
Exemplo n.º 50
0
def get_clb_contents():
    """
    Get Rackspace Cloud Load Balancer contents as list of `CLBNode`. CLB
    health monitor information is also returned as a pmap of :obj:`CLB` objects
    mapped on LB ID.

    :return: Effect of (``list`` of :obj:`CLBNode`, `pmap` of :obj:`CLB`)
    :rtype: :obj:`Effect`
    """

    # If we get a CLBNotFoundError while fetching feeds, we should throw away
    # all nodes related to that load balancer, because we don't want to act on
    # data that we know is invalid/outdated (for example, if we can't fetch a
    # feed because CLB was deleted, we don't want to say that we have a node in
    # DRAINING with draining time of 0; we should just say that the node is
    # gone).

    def gone(r):
        return catch(CLBNotFoundError, lambda exc: r)

    lb_ids = [lb['id'] for lb in (yield _retry(get_clbs()))]
    node_reqs = [
        _retry(get_clb_nodes(lb_id).on(error=gone([]))) for lb_id in lb_ids
    ]
    healthmon_reqs = [
        _retry(get_clb_health_monitor(lb_id).on(error=gone(None)))
        for lb_id in lb_ids
    ]
    all_nodes_hms = yield parallel(node_reqs + healthmon_reqs)
    all_nodes, hms = all_nodes_hms[:len(lb_ids)], all_nodes_hms[len(lb_ids):]
    lb_nodes = {
        lb_id: [CLBNode.from_node_json(lb_id, node) for node in nodes]
        for lb_id, nodes in zip(lb_ids, all_nodes)
    }
    clbs = {
        str(lb_id): CLB(bool(health_mon))
        for lb_id, health_mon in zip(lb_ids, hms) if health_mon is not None
    }
    draining = [
        n for n in concat(lb_nodes.values())
        if n.description.condition == CLBNodeCondition.DRAINING
    ]
    feeds = yield parallel([
        _retry(
            get_clb_node_feed(n.description.lb_id,
                              n.node_id).on(error=gone(None)))
        for n in draining
    ])
    nodes_to_feeds = dict(zip(draining, feeds))
    deleted_lbs = set([
        node.description.lb_id for (node, feed) in nodes_to_feeds.items()
        if feed is None
    ])

    def update_drained_at(node):
        feed = nodes_to_feeds.get(node)
        if node.description.lb_id in deleted_lbs:
            return None
        if feed is not None:
            node.drained_at = extract_clb_drained_at(feed)
        return node

    nodes = map(update_drained_at, concat(lb_nodes.values()))
    yield do_return((list(filter(bool, nodes)),
                     pmap(keyfilter(lambda k: k not in deleted_lbs, clbs))))
Exemplo n.º 51
0
def converge_all_groups(
        currently_converging, recently_converged, waiting,
        my_buckets, all_buckets,
        divergent_flags, build_timeout, interval,
        limited_retry_iterations, step_limits,
        converge_one_group=converge_one_group):
    """
    Check for groups that need convergence and which match up to the
    buckets we've been allocated.

    :param Reference currently_converging: pset of currently converging groups
    :param Reference recently_converged: pmap of group ID to time last
        convergence finished
    :param Reference waiting: pmap of group ID to number of iterations already
        waited
    :param my_buckets: The buckets that should be checked for group IDs to
        converge on.
    :param all_buckets: The set of all buckets that can be checked for group
        IDs to converge on.  ``my_buckets`` should be a subset of this.
    :param divergent_flags: divergent flags that were found in zookeeper.
    :param number build_timeout: number of seconds to wait for servers to be in
        building before it's is timed out and deleted
    :param number interval: number of seconds between attempts at convergence.
        Groups will not be converged if less than this amount of time has
        passed since the end of its last convergence.
    :param int limited_retry_iterations: number of iterations to wait for
        LIMITED_RETRY steps
    :param dict step_limits: Mapping of step class to number of executions
        allowed in a convergence cycle
    :param callable converge_one_group: function to use to converge a single
        group - to be used for test injection only
    """
    group_infos = get_my_divergent_groups(
        my_buckets, all_buckets, divergent_flags)
    # filter out currently converging groups
    cc = yield currently_converging.read()
    group_infos = [info for info in group_infos if info['group_id'] not in cc]
    if not group_infos:
        return
    yield msg('converge-all-groups', group_infos=group_infos,
              currently_converging=list(cc))

    @do
    def converge(tenant_id, group_id, dirty_flag):
        stat = yield Effect(GetStat(dirty_flag))
        # If the node disappeared, ignore it. `stat` will be None here if the
        # divergent flag was discovered only after the group is removed from
        # currently_converging, but before the divergent flag is deleted, and
        # then the deletion happens, and then our GetStat happens. This
        # basically means it happens when one convergence is starting as
        # another one for the same group is ending.
        if stat is None:
            yield msg('converge-divergent-flag-disappeared', znode=dirty_flag)
        else:
            eff = converge_one_group(currently_converging, recently_converged,
                                     waiting,
                                     tenant_id, group_id,
                                     stat.version, build_timeout,
                                     limited_retry_iterations, step_limits)
            result = yield Effect(TenantScope(eff, tenant_id))
            yield do_return(result)

    recent_groups = yield get_recently_converged_groups(recently_converged,
                                                        interval)
    effs = []
    for info in group_infos:
        tenant_id, group_id = info['tenant_id'], info['group_id']
        if group_id in recent_groups:
            # Don't converge a group if it has recently been converged.
            continue
        eff = converge(tenant_id, group_id, info['dirty-flag'])
        effs.append(
            with_log(eff, tenant_id=tenant_id, scaling_group_id=group_id))

    yield do_return(parallel(effs))