Пример #1
0
    def post(self):
        # Do not run for more than 9 minutes. Exceeding 10min hard limit causes 500.
        time_to_stop = time.time() + 9 * 60

        data = json.loads(self.request.body)
        start = utils.parse_datetime(data['start'])
        end = utils.parse_datetime(data['end'])
        logging.info('Deleting between %s and %s', start, end)

        triggered = 0
        total = 0
        q = model.ContentEntry.query(model.ContentEntry.expiration_ts >= start,
                                     model.ContentEntry.expiration_ts < end)
        cursor = None
        more = True
        while more and time.time() < time_to_stop:
            # Since this query dooes not fetch the ContentEntry entities themselves,
            # we cannot easily compute the size of the data deleted.
            keys, cursor, more = q.fetch_page(500,
                                              start_cursor=cursor,
                                              keys_only=True)
            if not keys:
                break
            total += len(keys)
            data = utils.encode_to_json([k.string_id() for k in keys])
            if utils.enqueue_task('/internal/taskqueue/cleanup/expired',
                                  'cleanup-expired',
                                  payload=data):
                triggered += 1
            else:
                logging.warning('Failed to trigger task')
        logging.info('Triggered %d tasks for %d entries', triggered, total)
Пример #2
0
def get_json(request, response, handler, resolution, description, order):
    """Returns the statistic data as a Google Visualization compatible reply.

  The return can be either JSON or JSONP, depending if the header
  'X-DataSource-Auth' is set in the request.

  Note that this is not real JSON, as explained in
  developers.google.com/chart/interactive/docs/dev/implementing_data_source

  Exposes the data in the format described at
  https://developers.google.com/chart/interactive/docs/reference#dataparam
  and
  https://developers.google.com/chart/interactive/docs/querylanguage

  Arguments:
  - request: A webapp2.Request.
  - response: A webapp2.Response.
  - handler: A StatisticsFramework.
  - resolution: One of 'days', 'hours' or 'minutes'.
  - description: Dict describing the columns.
  - order: List describing the order to use for the columns.

  Raises:
    ValueError if a 400 should be returned.
  """
    tqx_args = process_tqx(request.params.get('tqx', ''))
    duration = utils.get_request_as_int(request, 'duration', 120, 1, 256)
    now = None
    now_text = request.params.get('now')
    if now_text:
        now = utils.parse_datetime(now_text)

    table = stats_framework.get_stats(handler, resolution, now, duration, True)
    return get_json_raw(request, response, table, description, order, tqx_args)
Пример #3
0
def rebuild_task_cache_async(payload):
    """Rebuilds the TaskDimensions cache.

  This function is called in two cases:
  - A new kind of task request dimensions never seen before
  - The TaskDimensions.valid_until_ts expired

  It is a cache miss, query all the bots and check for the ones which can run
  the task.

  Warning: There's a race condition, where the TaskDimensions query could be
  missing some instances due to eventually coherent consistency in the BotInfo
  query. This only happens when there's new request dimensions set AND a bot
  that can run this task recently showed up.

  Runtime expectation: the scale on the number of bots that can run the task,
  via BotInfo.dimensions_flat filtering. As there can be tens of thousands of
  bots that can run the task, this can take a long time to store all the
  entities on a new kind of request. As such, it must be called in the backend.

  Arguments:
  - payload: dict as created in assert_task_async() with:
    - 'dimensions': dict of task dimensions to refresh
    - 'dimensions_hash': precalculated hash for dimensions
    - 'valid_until_ts': expiration_ts + _EXTEND_VALIDITY for how long this cache
      is valid

  Returns:
    True if everything was processed, False if it needs to be retried.
  """
    data = json.loads(payload)
    logging.debug('rebuild_task_cache(%s)', data)
    task_dimensions = data[u'dimensions']
    task_dimensions_hash = int(data[u'dimensions_hash'])
    valid_until_ts = utils.parse_datetime(data[u'valid_until_ts'])
    task_dimensions_key = _get_task_dimensions_key(task_dimensions_hash,
                                                   task_dimensions)
    now = utils.utcnow()

    expanded_task_dimensions_flats = expand_dimensions_to_flats(
        task_dimensions)
    try:
        yield [
            _refresh_all_BotTaskDimensions_async(now, valid_until_ts, df,
                                                 task_dimensions_hash)
            for df in expanded_task_dimensions_flats
        ]
        # Done updating, now store the entity. Must use a transaction as there
        # could be other dimensions set in the entity.
        yield _refresh_TaskDimensions_async(now, valid_until_ts,
                                            expanded_task_dimensions_flats,
                                            task_dimensions_key)
    finally:
        # Any of the calls above could throw. Log how far long we processed.
        duration = (utils.utcnow() - now).total_seconds()
        logging.debug(
            'rebuild_task_cache(%d) in %.3fs\n%s\ndimensions_flat size=%d',
            task_dimensions_hash, duration, task_dimensions,
            len(expanded_task_dimensions_flats))
    raise ndb.Return(True)
Пример #4
0
def rebuild_task_cache(payload):
    """Rebuilds the TaskDimensions cache.

  This function is called in two cases:
  - A new kind of task request dimensions never seen before
  - The TaskDimensions.valid_until_ts expired

  It is a cache miss, query all the bots and check for the ones which can run
  the task.

  Warning: There's a race condition, where the TaskDimensions query could be
  missing some instances due to eventually coherent consistency in the BotInfo
  query. This only happens when there's new request dimensions set AND a bot
  that can run this task recently showed up.

  Runtime expectation: the scale on the number of bots that can run the task,
  via BotInfo.dimensions_flat filtering. As there can be tens of thousands of
  bots that can run the task, this can take a long time to store all the
  entities on a new kind of request. As such, it must be called in the backend.

  Arguments:
  - payload: dict as created in assert_task() with:
    - 'dimensions': dict of task dimensions to refresh
    - 'dimensions_hash': precalculated hash for dimensions
    - 'valid_until_ts': expiration_ts + _ADVANCE for how long this cache is
      valid

  Returns:
    True if everything was processed, False if it needs to be retried.
  """
    data = json.loads(payload)
    logging.debug('rebuild_task_cache(%s)', data)
    dimensions = data[u'dimensions']
    dimensions_hash = int(data[u'dimensions_hash'])
    valid_until_ts = utils.parse_datetime(data[u'valid_until_ts'])
    dimensions_flat = []
    for k, values in dimensions.iteritems():
        for v in values:
            dimensions_flat.append(u'%s:%s' % (k, v))
    dimensions_flat.sort()

    now = utils.utcnow()
    updated = 0
    viable = 0
    try:
        pending = set()
        for bot_task_key in _yield_BotTaskDimensions_keys(
                dimensions_hash, dimensions_flat):
            viable += 1
            future = _refresh_BotTaskDimensions(bot_task_key, dimensions_flat,
                                                now, valid_until_ts)
            pending.add(future)
            updated += sum(1 for i in _cap_futures(pending) if i)
        updated += sum(1 for i in _flush_futures(pending) if i)

        # Done updating, now store the entity. Must use a transaction as there could
        # be other dimensions set in the entity.
        task_dims_key = _get_task_dims_key(dimensions_hash, dimensions)

        def run():
            obj = task_dims_key.get()
            if not obj:
                obj = TaskDimensions(key=task_dims_key)
            if obj.assert_request(now, valid_until_ts, dimensions_flat):
                obj.put()
            return obj

        try:
            # Retry often. This transaction tends to fail frequently, and this is
            # running from a task queue so it's fine if it takes more time, success is
            # more important.
            datastore_utils.transaction(run, retries=4)
        except datastore_utils.CommitError as e:
            # Still log an error but no need for a stack trace in the logs. It is
            # important to surface that the call failed so the task queue is retried
            # later.
            logging.error('Failed updating TaskDimensions: %s', e)
            return False
    finally:
        # Any of the _refresh_BotTaskDimensions() calls above could throw. Still log
        # how far we went.
        logging.debug(
            'rebuild_task_cache(%d) in %.3fs. viable bots: %d; bots updated: %d\n'
            '%s', dimensions_hash, (utils.utcnow() - now).total_seconds(),
            viable, updated, '\n'.join('  ' + d for d in dimensions_flat))
    return True
Пример #5
0
def rebuild_task_cache(payload):
  """Rebuilds the TaskDimensions cache.

  This function is called in two cases:
  - A new kind of task request dimensions never seen before
  - The TaskDimensions.valid_until_ts expired

  It is a cache miss, query all the bots and check for the ones which can run
  the task.

  Warning: There's a race condition, where the TaskDimensions query could be
  missing some instances due to eventually coherent consistency in the BotInfo
  query. This only happens when there's new request dimensions set AND a bot
  that can run this task recently showed up.

  Runtime expectation: the scale on the number of bots that can run the task,
  via BotInfo.dimensions_flat filtering. As there can be tens of thousands of
  bots that can run the task, this can take a long time to store all the
  entities on a new kind of request. As such, it must be called in the backend.

  Arguments:
  - payload: dict as created in assert_task() with:
    - 'dimensions': dict of task dimensions to refresh
    - 'dimensions_hash': precalculated hash for dimensions
    - 'valid_until_ts': expiration_ts + _EXTEND_VALIDITY for how long this cache
      is valid

  Returns:
    True if everything was processed, False if it needs to be retried.
  """
  data = json.loads(payload)
  logging.debug('rebuild_task_cache(%s)', data)
  dimensions = data[u'dimensions']
  dimensions_hash = int(data[u'dimensions_hash'])
  valid_until_ts = utils.parse_datetime(data[u'valid_until_ts'])
  dimensions_flat = []
  for k, values in dimensions.iteritems():
    for v in values:
      dimensions_flat.append(u'%s:%s' % (k, v))
  dimensions_flat.sort()

  now = utils.utcnow()
  # Number of BotTaskDimensions entities that were created/updated in the DB.
  updated = 0
  # Number of BotTaskDimensions entities that matched this task queue.
  viable = 0
  try:
    pending = []
    for bot_task_key in _yield_BotTaskDimensions_keys(
        dimensions_hash, dimensions_flat):
      viable += 1
      future = _refresh_BotTaskDimensions(
          bot_task_key, dimensions_flat, now, valid_until_ts)
      pending.append(future)
      done, pending = _cap_futures(pending)
      updated += sum(1 for i in done if i)
    updated += sum(1 for i in _flush_futures(pending) if i)
    # The main reason for this log entry is to confirm the timing of the first
    # part (updating BotTaskDimensions) versus the second part (updating
    # TaskDimensions).
    logging.debug('Updated %d BotTaskDimensions', updated)

    # Done updating, now store the entity. Must use a transaction as there could
    # be other dimensions set in the entity.
    task_dims_key = _get_task_dims_key(dimensions_hash, dimensions)

    # First do a dry run. If the dry run passes, skip the transaction.
    #
    # The rationale is that there can be concurrent trigger of this taskqueue
    # (rebuild-cache) when there are conccurent task creation. The dry run cost
    # not much overhead and if it passes, it saves transaction contention.
    #
    # The transaction contention can be problematic on pool with a high
    # cardinality of the dimension sets.
    obj = task_dims_key.get()
    if not obj or obj.assert_request(now, valid_until_ts, dimensions_flat):
      def _run():
        action = None
        obj = task_dims_key.get()
        if not obj:
          obj = TaskDimensions(key=task_dims_key)
          action = 'created'
        if obj.assert_request(now, valid_until_ts, dimensions_flat):
          if action:
            action = 'updated'
          if not obj.sets:
            obj.key.delete()
            return 'deleted'
          obj.put()
        return action

      # Do an adhoc transaction instead of using datastore_utils.transaction().
      # This is because for some pools, the transaction rate may be so high that
      # it's impossible to get a good performance on the entity group.
      #
      # In practice the odds of conflict is ~nil, because it can only conflict
      # if a TaskDimensions.set has more than one item and this happens when
      # there's a hash conflict (odds 2^31) plus two concurrent task running
      # simultaneously (over _EXTEND_VALIDITY period) so we can do it in a more
      # adhoc way.
      key = '%s:%s' % (
          task_dims_key.parent().string_id(), task_dims_key.string_id())
      if not memcache.add(key, True, time=60, namespace='task_queues_tx'):
        # add() returns True if the entry was added, False otherwise. That's
        # perfect.
        logging.warning('Failed taking pseudo-lock for %s; reenqueuing', key)
        return False
      try:
        action = _run()
      finally:
        memcache.delete(key, namespace='task_queues_tx')

      # Keeping this dead code for now, in case we find a solution for the
      # transaction rate issue.
      #try:
      #  action = datastore_utils.transaction(_run, retries=4)
      #except datastore_utils.CommitError as e:
      #  # Still log an error but no need for a stack trace in the logs. It is
      #  # important to surface that the call failed so the task queue is
      #  # retried later.
      #  logging.warning('Failed updating TaskDimensions: %s; reenqueuing', e)
      #  return False

      if action:
        # Only log at info level when something was done. This helps scanning
        # quickly the logs.
        logging.info('Did %s', action)
      else:
        logging.debug('Did nothing')
    else:
      logging.debug('Skipped transaction!')
  finally:
    # Any of the _refresh_BotTaskDimensions() calls above could throw. Still log
    # how far we went.
    msg = (
      'rebuild_task_cache(%d) in %.3fs. viable bots: %d; bots updated: %d\n%s')
    dims = '\n'.join('  ' + d for d in dimensions_flat)
    duration = (utils.utcnow()-now).total_seconds()
    # Only log at info level when something was done. This helps scanning
    # quickly the logs.
    if updated:
      logging.info(msg, dimensions_hash, duration, viable, updated, dims)
    else:
      logging.debug(msg, dimensions_hash, duration, viable, updated, dims)
  return True