Exemplo n.º 1
0
  def find_resource(self, resource_id):
    # This can only happen in the master!
    assert self.server.is_master()

    # Lineair scan through all the resources in the state to find this
    # resource.
    for r in self.wrapped_state.resource:
      if r.resource_id == resource_id:
        return r

    # The resource was not found. Find the resource template which
    # describes this resource.
    t = global_config.find_resource_template(resource_id)

    # No template found.
    if not t:
      logger.error(
          'Cannot create server state entry for resource %s (no template)' %
          resource_id)
      return None

    # We got the template. Now create a new blank resource entry in the server
    # state.
    logger.info(
        '%s creating new resource %s' %
        (self.get_server_id(), resource_id))
    r = self.wrapped_state.resource.add()
    r.template.CopyFrom(t)
    r.resource_id = resource_id

    # Calculates the time this resource went out of learning mode.
    # Note: This time may be in the past.
    r.learning_mode_expiry_time = self.wrapped_state.election_victory_time + \
      AlgorithmImpl.create(t, self.wrapped_state.server_level).get_max_lease_duration()

    if r.learning_mode_expiry_time > clock.get_time():
      logger.info(
          '%s putting resource %s in learning mode until T=%d' %
          (self.get_server_id(), resource_id,
           r.learning_mode_expiry_time))
      # Schedules an action to log a message when this resource leaves
      # learning mode.
      scheduler.add_absolute(
          r.learning_mode_expiry_time + 1, _LeaveLearningMode(),
          (self.get_server_id(), r.resource_id))

    # Note: At this point this server has not capacity lease for this resource.
    # It is up to the caller to deal with this.
    assert r.IsInitialized()

    return r
Exemplo n.º 2
0
    def find_resource(self, resource_id):
        # This can only happen in the master!
        assert self.server.is_master()

        # Lineair scan through all the resources in the state to find this
        # resource.
        for r in self.wrapped_state.resource:
            if r.resource_id == resource_id:
                return r

        # The resource was not found. Find the resource template which
        # describes this resource.
        t = global_config.find_resource_template(resource_id)

        # No template found.
        if not t:
            logger.error(
                'Cannot create server state entry for resource %s (no template)'
                % resource_id)
            return None

        # We got the template. Now create a new blank resource entry in the server
        # state.
        logger.info('%s creating new resource %s' %
                    (self.get_server_id(), resource_id))
        r = self.wrapped_state.resource.add()
        r.template.CopyFrom(t)
        r.resource_id = resource_id

        # Calculates the time this resource went out of learning mode.
        # Note: This time may be in the past.
        r.learning_mode_expiry_time = self.wrapped_state.election_victory_time + \
          AlgorithmImpl.create(t, self.wrapped_state.server_level).get_max_lease_duration()

        if r.learning_mode_expiry_time > clock.get_time():
            logger.info('%s putting resource %s in learning mode until T=%d' %
                        (self.get_server_id(), resource_id,
                         r.learning_mode_expiry_time))
            # Schedules an action to log a message when this resource leaves
            # learning mode.
            scheduler.add_absolute(r.learning_mode_expiry_time + 1,
                                   _LeaveLearningMode(),
                                   (self.get_server_id(), r.resource_id))

        # Note: At this point this server has not capacity lease for this resource.
        # It is up to the caller to deal with this.
        assert r.IsInitialized()

        return r
Exemplo n.º 3
0
    def _get_capacity(self):
        assert self.is_master()

        now = clock.get_time()

        # Assume the worst... :-)
        success = False

        # If we are server level 0, we need to get the capacity from the
        # configuration.
        if self.server_level == 0:
            for resource in self.state.all_resources():
                algo = AlgorithmImpl.create(resource.template,
                                            self.server_level)
                resource.ClearField('has')
                resource.has.CopyFrom(
                    algo.create_lease(resource, resource.template.capacity))

                # Note, we set a refresh interval here even though the capacity we get from the
                # configuration lasts forever. However by setting a refresh interval and relatively
                # short leases we ensure that configuration changes (e.g. from CDD) are
                # picked up.
                resource.has.refresh_interval *= 2

            success = True
        else:
            # If this is not the root server it gets its capacity from
            # a downstream server.
            success = self._get_capacity_downstream()

        logger.info('%s resource state after getting capacity:' %
                    self.server_id)

        for resource in self.state.all_resources():
            logger.info('resource: %s got: %lf lease: %d refresh: %d' %
                        (resource.resource_id, resource.has.capacity,
                         resource.has.expiry_time - now,
                         resource.has.refresh_interval))

        return success
Exemplo n.º 4
0
  def _get_capacity(self):
    assert self.is_master()

    now = clock.get_time()

    # Assume the worst... :-)
    success = False

    # If we are server level 0, we need to get the capacity from the
    # configuration.
    if self.server_level == 0:
      for resource in self.state.all_resources():
        algo = AlgorithmImpl.create(resource.template, self.server_level)
        resource.ClearField('has')
        resource.has.CopyFrom(
            algo.create_lease(resource, resource.template.capacity))

        # Note, we set a refresh interval here even though the capacity we get from the
        # configuration lasts forever. However by setting a refresh interval and relatively
        # short leases we ensure that configuration changes (e.g. from CDD) are
        # picked up.
        resource.has.refresh_interval *= 2

      success = True
    else:
      # If this is not the root server it gets its capacity from
      # a downstream server.
      success = self._get_capacity_downstream()

    logger.info('%s resource state after getting capacity:' % self.server_id)

    for resource in self.state.all_resources():
      logger.info(
          'resource: %s got: %lf lease: %d refresh: %d' %
          (resource.resource_id, resource.has.capacity,
           resource.has.expiry_time - now, resource.has.refresh_interval))

    return success
Exemplo n.º 5
0
    def GetCapacity_RPC(self, request):
        assert request.IsInitialized()
        assert self.state.is_initialized()

        # If this server is not the master it cannot handle this request.
        # The client should do a new Discovery.
        if not self.is_master():
            self.state.assert_clean()
            logger.info('%s getting a GetCapacity request when not master' %
                        self.server_id)
            Counter.get('server.GetCapacity_RPC.not_master').inc()

            return None

        timer = Gauge.get('server.GetCapacity_RPC.latency')
        timer.start_timer()
        logger.debug(request)
        now = clock.get_time()

        # Cleanup the state. This removes resources and clients with expired
        # leases and such.
        self.state.cleanup()

        # A set of resources that we need to skip in step 2 (the actual
        # handing out of capacity.
        resources_to_skip = set()

        # First step: Go through the request and update the state with the
        # information from the request.
        for req in request.resource:
            # Finds the resource and the client state for this resource.
            (resource,
             cr) = self.state.find_client_resource(request.client_id,
                                                   req.resource_id)

            # If this resource does not exist we don't need to do anything
            # right now.
            if resource:
                assert cr

                # Checks whether the last request from this client was at least
                # _kMinimumInterval seconds ago.
                if cr.HasField(
                        'last_request_time'
                ) and now - cr.last_request_time < _kMinimumInterval:
                    logger.warning(
                        '%s GetCapacity request for resource %s within the %d second '
                        'threshold' %
                        (self.server_id, req.resource_id, _kMinimumInterval))
                    resources_to_skip.add(req.resource_id)
                else:
                    # Updates the state with the information in the request.
                    cr.last_request_time = now
                    cr.priority = req.priority
                    cr.wants = req.wants

                    if req.HasField('has'):
                        cr.has.CopyFrom(req.has)
                    else:
                        cr.ClearField('has')

        # Creates a new response object in which we will insert the responses for
        # the resources contained in the request.
        response = GetCapacityResponse()

        # Step 2: Loop through all the individual resource requests in the request
        # and hand out capacity.
        for req in request.resource:
            # If this is a resource we need to skip, let's skip it.
            if req.resource_id in resources_to_skip:
                continue

            # Finds the resource and the client state for this resource.
            (resource,
             cr) = (self.state.find_client_resource(request.client_id,
                                                    req.resource_id))

            # Adds a response proto to the overall response.
            resp = response.response.add()
            resp.resource_id = req.resource_id

            # If this is an unknown resource just give the client whatever it
            # is asking for.
            if not resource:
                assert not cr

                logger.warning(
                    '%s GetCapacity request for unmanaged resource %s' %
                    (self.server_id, req.resource_id))
                resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
                resp.gets.capacity = req.wants
            else:
                # Sets the safe capacity in the response if there is one
                # configured for this resource.
                if resource.template.HasField('safe_capacity'):
                    resp.safe_capacity = resource.template.safe_capacity

                # Finds the algorithm implementation object for this resource.
                algo = AlgorithmImpl.create(resource.template,
                                            self.server_level)

                # If the resource is in learning mode we just return whatever the client
                # has now and create a default lease.
                if resource.learning_mode_expiry_time >= now:
                    if cr.HasField('has'):
                        has_now = cr.has.capacity
                    else:
                        has_now = 0

                    cr.has.CopyFrom(algo.create_lease(resource, has_now))
                    Counter.get('server.learning_mode_response').inc()
                else:
                    # Otherwise we just run the algorithm. This will update the
                    # client state object.
                    algo.run_client(resource, cr)
                    Counter.get('server.algorithm_runs').inc()

                # Copies the output from the algorithm run into the response.
                resp.gets.CopyFrom(cr.has)

            assert resp.IsInitialized()
            logger.info(
                '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d'
                % (self.server_id, request.client_id, req.resource_id,
                   req.wants, resp.gets.capacity, resp.gets.expiry_time - now,
                   resp.gets.refresh_interval))

        assert response.IsInitialized()

        timer.stop_timer()

        return response
Exemplo n.º 6
0
  def GetCapacity_RPC(self, request):
    assert request.IsInitialized()
    assert self.state.is_initialized()

    # If this server is not the master it cannot handle this request.
    # The client should do a new Discovery.
    if not self.is_master():
      self.state.assert_clean()
      logger.info('%s getting a GetCapacity request when not master' %
                  self.server_id)
      Counter.get('server.GetCapacity_RPC.not_master').inc()

      return None

    timer = Gauge.get('server.GetCapacity_RPC.latency')
    timer.start_timer()
    logger.debug(request)
    now = clock.get_time()

    # Cleanup the state. This removes resources and clients with expired
    # leases and such.
    self.state.cleanup()

    # A set of resources that we need to skip in step 2 (the actual
    # handing out of capacity.
    resources_to_skip = set()

    # First step: Go through the request and update the state with the
    # information from the request.
    for req in request.resource:
       # Finds the resource and the client state for this resource.
      (resource, cr) = self.state.find_client_resource(
          request.client_id,
          req.resource_id)

      # If this resource does not exist we don't need to do anything
      # right now.
      if resource:
        assert cr

        # Checks whether the last request from this client was at least
        # _kMinimumInterval seconds ago.
        if cr.HasField('last_request_time') and now - cr.last_request_time < _kMinimumInterval:
          logger.warning(
              '%s GetCapacity request for resource %s within the %d second '
              'threshold' %
              (self.server_id, req.resource_id, _kMinimumInterval))
          resources_to_skip.add(req.resource_id)
        else:
          # Updates the state with the information in the request.
          cr.last_request_time = now
          cr.priority = req.priority
          cr.wants = req.wants

          if req.HasField('has'):
            cr.has.CopyFrom(req.has)
          else:
            cr.ClearField('has')

    # Creates a new response object in which we will insert the responses for
    # the resources contained in the request.
    response = GetCapacityResponse()

    # Step 2: Loop through all the individual resource requests in the request
    # and hand out capacity.
    for req in request.resource:
      # If this is a resource we need to skip, let's skip it.
      if req.resource_id in resources_to_skip:
        continue

      # Finds the resource and the client state for this resource.
      (resource, cr) = (
          self.state.find_client_resource(
              request.client_id,
              req.resource_id))

      # Adds a response proto to the overall response.
      resp = response.response.add()
      resp.resource_id = req.resource_id

      # If this is an unknown resource just give the client whatever it
      # is asking for.
      if not resource:
        assert not cr

        logger.warning(
            '%s GetCapacity request for unmanaged resource %s' %
            (self.server_id, req.resource_id))
        resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
        resp.gets.capacity = req.wants
      else:
        # Sets the safe capacity in the response if there is one
        # configured for this resource.
        if resource.template.HasField('safe_capacity'):
          resp.safe_capacity = resource.template.safe_capacity

        # Finds the algorithm implementation object for this resource.
        algo = AlgorithmImpl.create(resource.template, self.server_level)

        # If the resource is in learning mode we just return whatever the client
        # has now and create a default lease.
        if resource.learning_mode_expiry_time >= now:
          if cr.HasField('has'):
            has_now = cr.has.capacity
          else:
            has_now = 0

          cr.has.CopyFrom(algo.create_lease(resource, has_now))
          Counter.get('server.learning_mode_response').inc()
        else:
          # Otherwise we just run the algorithm. This will update the
          # client state object.
          algo.run_client(resource, cr)
          Counter.get('server.algorithm_runs').inc()

        # Copies the output from the algorithm run into the response.
        resp.gets.CopyFrom(cr.has)

      assert resp.IsInitialized()
      logger.info(
          '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d' %
          (self.server_id, request.client_id, req.resource_id, req.wants,
           resp.gets.capacity, resp.gets.expiry_time - now,
           resp.gets.refresh_interval))

    assert response.IsInitialized()

    timer.stop_timer()

    return response