Пример #1
0
def _SetHostRecoveryState(hostname, recovery_state, assignee=None):
  """Set host's recovery state."""
  host = GetHost(hostname)
  if not host:
    logging.error("Host %s doesn't exist.", hostname)
    return
  host.assignee = assignee
  entities_to_update = []
  if recovery_state == common.RecoveryState.VERIFIED:
    host.recovery_state = common.RecoveryState.VERIFIED
    host.assignee = host.assignee
    host.last_recovery_time = common.Now()
    host.timestamp = common.Now()
    entities_to_update.append(_CreateHostInfoHistory(host))
    devices = GetDevicesOnHost(hostname)
    for device in devices:
      if (device.recovery_state and
          device.recovery_state != common.RecoveryState.UNKNOWN):
        entities_to_update.extend(
            _BuildDeviceRecoveryState(device, common.RecoveryState.VERIFIED))
    # After it's verified, it goes into a state as no-one is recovering it.
    host.recovery_state = common.RecoveryState.UNKNOWN
    host.assignee = None
  else:
    host.recovery_state = recovery_state
  host.last_recovery_time = common.Now()
  host.timestamp = common.Now()
  entities_to_update.append(_CreateHostInfoHistory(host))
  entities_to_update.append(host)
  ndb.put_multi(entities_to_update)
Пример #2
0
def AddToSyncQueue(command):
    """Add a command to the sync queue."""
    _, request_id, _, command_id = command.key.flat()
    payload = json.dumps({
        command_manager.COMMAND_ID_KEY: command_id,
        command_manager.REQUEST_ID_KEY: request_id,
    })
    now = common.Now()
    update_time = command.update_time or now
    timeout_seconds = GetCommandQueueTimeoutSeconds(command)
    if command.state != common.CommandState.QUEUED:
        # Schedule next sync based on when attempt may be cancelled.
        next_sync = now + datetime.timedelta(
            minutes=MAX_COMMAND_EVENT_DELAY_MIN)
    else:
        next_sync = min(
            # Time to next ensure command is leasable
            now + datetime.timedelta(minutes=MAX_COMMAND_EVENT_DELAY_MIN),
            # Time to cancel command
            update_time + datetime.timedelta(seconds=timeout_seconds))
    logging.info('Scheduling request [%s] command [%s] to be synced at %s',
                 request_id, command_id, next_sync)
    task_scheduler.AddTask(queue_name=COMMAND_SYNC_QUEUE,
                           payload=payload,
                           eta=next_sync)
Пример #3
0
def _BuildDeviceRecoveryState(device, recovery_state):
  """Build device's recovery state and its history."""
  entities_to_update = []
  if recovery_state == common.RecoveryState.VERIFIED:
    device.recovery_state = common.RecoveryState.VERIFIED
    device.last_recovery_time = common.Now()
    device.timestamp = common.Now()
    entities_to_update.append(_CreateDeviceInfoHistory(device))
    device.recovery_state = common.RecoveryState.UNKNOWN
  else:
    device.recovery_state = recovery_state
  device.last_recovery_time = common.Now()
  device.timestamp = common.Now()
  entities_to_update.append(_CreateDeviceInfoHistory(device))
  entities_to_update.append(device)
  return entities_to_update
Пример #4
0
def UpdateGoneHost(hostname):
  """Set a host and its devices to GONE."""
  logging.info("Set host %s and its devices to GONE.", hostname)
  host = GetHost(hostname)
  if host.host_state == api_messages.HostState.GONE:
    logging.info("Host %s is already GONE.", hostname)
    return
  entities_to_update = []
  now = common.Now()
  host_state_history, host_history = _UpdateHostState(
      host, api_messages.HostState.GONE, now)
  entities_to_update.append(host)
  if host_state_history:
    entities_to_update.append(host_state_history)
  if host_history:
    entities_to_update.append(host_history)
  devices = GetDevicesOnHost(hostname)
  for device in devices or []:
    if device.state == common.DeviceState.GONE:
      continue
    logging.debug("Set device %s to GONE.", device.device_serial)
    device_state_history, device_history = _UpdateDeviceState(
        device, common.DeviceState.GONE, now)
    entities_to_update.append(device)
    if device_state_history:
      entities_to_update.append(device_state_history)
    if device_history:
      entities_to_update.append(device_history)
  _DoCountDeviceForHost(host, devices)
  ndb.put_multi(entities_to_update)
def CreateTask(command_task_args):
    """Save the command task in datastore.

  Args:
    command_task_args: a CommandTaskArgs object
  Returns:
    true if created a new task, false if the task already exists.
  """
    test_bench = _GetTestBench(command_task_args.cluster,
                               command_task_args.run_target)

    task = datastore_entities.CommandTask(
        request_id=str(command_task_args.request_id),
        command_id=str(command_task_args.command_id),
        task_id=command_task_args.task_id,
        plugin_data=command_task_args.plugin_data,
        lease_count=0,
        command_line=command_task_args.command_line,
        run_count=command_task_args.run_count,
        run_index=command_task_args.run_index,
        attempt_index=command_task_args.attempt_index,
        shard_count=command_task_args.shard_count,
        shard_index=command_task_args.shard_index,
        test_bench=test_bench,
        priority=command_task_args.priority or 0,
        leasable=True,
        request_type=command_task_args.request_type,
        schedule_timestamp=common.Now())
    return _DoCreateTask(task)
def HandleCommandEvent():
  """Process a command event message in COMMAND_EVENT_QUEUE."""
  payload = flask.request.get_data()
  try:
    payload = zlib.decompress(payload)
  except zlib.error:
    logging.warning("payload may not be compressed: %s", payload, exc_info=True)
  objs = json.loads(payload)
  # To handle non-batched objects.
  if not isinstance(objs, list):
    objs = [objs]
  failed_objs = []
  exception = None
  for obj in objs:
    try:
      logging.info(_Truncate(obj))
      event = command_event.CommandEvent(**obj)
      if (event.time + datetime.timedelta(days=COMMAND_EVENT_TIMEOUT_DAYS) <
          common.Now()):
        logging.warning("Ignore event retried for %d days:\n%s",
                        COMMAND_EVENT_TIMEOUT_DAYS, event)
        continue
      if event.attempt_state == common.CommandState.UNKNOWN:
        logging.warning("Ignore unknown state event:\n%s.", event)
        continue
      ProcessCommandEvent(event)
    except Exception as e:        exception = e
      logging.warning(
          "Failed to process event, will retry: obj=%s", _Truncate(obj),
          exc_info=True)
      # failed events will be retried later.
      failed_objs.append(obj)
Пример #7
0
def _DoRestoreDevice(device_serial, hostname):
  """Actually restore the device.

  This need to run in a separate transaction otherwise _CountDeviceForHost
  doesn't work, since it will count device before the restore is committed.
  Both device serial and hostname are required since transactional
  only works for ancestor query.

  Args:
    device_serial: device's serial
    hostname: device hostname
  Returns:
    the DeviceInfo entity.
  """
  device = ndb.Key(
      datastore_entities.HostInfo, hostname,
      datastore_entities.DeviceInfo, device_serial).get()
  if not device:
    return None
  if not device.hidden:
    logging.info("Device %s %s is not hidden.", device_serial, hostname)
    return device
  device.hidden = False
  device.timestamp = common.Now()
  device.put()
  return device
Пример #8
0
def StartHostSync(hostname, current_host_sync_id=None):
  """Start host sync.

  Start host sync, if there is no host sync task or the host sync task is old.

  Args:
    hostname: hostname
    current_host_sync_id: unique id for current host sync that trigger
      this add back.
  Returns:
    the new host_sync_id or None if not added
  """
  host_sync = datastore_entities.HostSync.get_by_id(hostname)
  now = common.Now()
  stale_time = common.Now() - HOST_SYNC_STALE_TIMEOUT
  if (host_sync and host_sync.host_sync_id and
      host_sync.host_sync_id != current_host_sync_id and
      host_sync.update_timestamp and
      host_sync.update_timestamp >= stale_time):
    logging.debug(
        "Another host sync %s is already scheduled.",
        host_sync.host_sync_id)
    return None
  if not host_sync:
    host_sync = datastore_entities.HostSync(id=hostname)
  elif (host_sync.update_timestamp and
        host_sync.update_timestamp < stale_time):
    logging.info(
        "The old sync %s is inactive since %s.",
        host_sync.host_sync_id, host_sync.update_timestamp)
  host_sync.host_sync_id = uuid.uuid4().hex
  payload = json.dumps({
      HOSTNAME_KEY: hostname,
      HOST_SYNC_ID_KEY: host_sync.host_sync_id,
  })
  task = task_scheduler.AddTask(
      queue_name=HOST_SYNC_QUEUE,
      payload=payload,
      eta=common.Now() + HOST_SYNC_INTERVAL)
  # the taskname is for debugging purpose.
  host_sync.taskname = task.name
  host_sync.update_timestamp = now
  host_sync.put()
  logging.debug("Host will sync by task %s with host_sync_id %s.",
                task.name, host_sync.host_sync_id)
  return host_sync.host_sync_id
Пример #9
0
def _AssignHost(hostname, assignee):
  host = GetHost(hostname)
  if not host:
    logging.error("Host %s doesn't exist.", hostname)
    return
  host.assignee = assignee
  if not assignee:
    host.last_recovery_time = common.Now()
  host.put()
Пример #10
0
def _UpdateSyncStatus(request_id):
  """Update the RequestSyncStatus in a transaction.

  Args:
    request_id: The request ID as a string

  Returns:
    True if the request should be synced. False otherwise.

  Raises:
    RequestSyncStatusNotFoundError: the a RequestSyncStatus is not found for the
      given request.
  """
  sync_status_key = GetRequestSyncStatusKey(request_id)
  sync_status = sync_status_key.get()

  if not sync_status:
    # This should not happen. If it does, that would mean that put() operation
    # in Monitor() failed after adding the request which would have caused
    # CreateRequest to also fail.
    raise RequestSyncStatusNotFoundError('No RequestSyncStatus found for: %s' %
                                         request_id)

  should_sync = False
  if sync_status.has_new_command_events:
    logging.info('Request %s has new command events.', request_id)
    sync_status.has_new_command_events = False
    should_sync = True
  elif not sync_status.last_sync_time:
    logging.info('Request %s does not have a last sync time.', request_id)
    should_sync = True
  elif (datetime.timedelta(seconds=FORCE_REQUEST_SYNC_SECS) <
        (common.Now() - sync_status.last_sync_time)):
    logging.info('Request %s was last synced on %s.', request_id,
                 sync_status.last_sync_time)
    should_sync = True

  if should_sync:
    sync_status.last_sync_time = common.Now()
    sync_status.put()

  return should_sync
Пример #11
0
def _AddRequestToQueue(request_id, countdown_secs=LONG_SYNC_COUNTDOWN_SECS):
  """Add a request to the sync queue."""
  payload = json.dumps({
      REQUEST_ID_KEY: request_id,
  })

  next_sync = common.Now() + datetime.timedelta(seconds=countdown_secs)
  logging.debug('Queueing request %s to be synced at %s', request_id, next_sync)
  task = task_scheduler.AddTask(
      queue_name=REQUEST_SYNC_QUEUE, payload=payload, eta=next_sync)
  logging.debug('Queued task: %s', task)
Пример #12
0
def StopHostSync(hostname, current_host_sync_id):
  """Stop sync the host."""
  host_sync = datastore_entities.HostSync.get_by_id(hostname)
  stale_time = common.Now() - HOST_SYNC_STALE_TIMEOUT
  if not host_sync:
    logging.info("No host sync for %s.", hostname)
    return
  if (current_host_sync_id != host_sync.host_sync_id and
      host_sync.update_timestamp >= stale_time):
    logging.debug(
        "Another host sync %s is already scheduled.",
        host_sync.host_sync_id)
    return
  logging.debug("Stop host sync for %s.", hostname)
  host_sync.key.delete()
def LeaseTask(task_id):
    """Lease a task.

  Args:
    task_id: the task id
  Returns:
    None if the task is not leasable, otherwise the task.
  """
    task = _Key(task_id).get()
    if not task or not task.leasable:
        return None
    task.leasable = False
    task.lease_timestamp = common.Now()
    task.lease_count += 1
    task.put()
    return task
Пример #14
0
def CalculateDeviceUtilization(device_serial, days=7):
  """Calculates the device utilization rate over a number of days.

  Device utilization is defined as the time a device has been in allocated
  state over the given number of days.

  Args:
    device_serial: Serial for the device to calculate utilization for.
    days: Number of days to calculate utilization. Defaults to 7.
  Returns:
    A number representing the percent of time the device has been allocated.
  Raises:
    ValueError: if the given days are invalid
  """
  if days <= 0:
    raise ValueError("Number of days [%d] should be > 0" % (days))
  now = common.Now()
  requested_time = datetime.timedelta(days=days)
  start_date = now - requested_time

  query = datastore_entities.DeviceStateHistory.query()
  query = query.filter(
      datastore_entities.DeviceStateHistory.device_serial == device_serial)
  query = query.filter(
      datastore_entities.DeviceStateHistory.timestamp >= start_date)
  query = query.order(datastore_entities.DeviceStateHistory.timestamp)

  allocated_time = datetime.timedelta()
  record_start_time = None
  # If the device was allocated before the given days and continued to be,
  # it will miss that allocation time as part of this calculation.
  for record in query.iter():
    # May take a while depending how often this device changes states.
    if record.state == "Allocated":
      if not record_start_time:
        record_start_time = record.timestamp
    elif record_start_time:
      allocated_time += record.timestamp - record_start_time
      record_start_time = None

  if record_start_time:
    # Last known state was allocated
    allocated_time += now - record_start_time

  total_seconds = requested_time.total_seconds()
  allocated_seconds = allocated_time.total_seconds()
  return float(allocated_seconds) / float(total_seconds)
Пример #15
0
def RestoreHost(hostname):
  """Restore a host and its devices."""
  logging.info("Restore host %s.", hostname)
  host = GetHost(hostname)
  if not host:
    return None
  if not host.hidden:
    logging.info("Host %s is not hidden.", hostname)
    return host
  now = common.Now()
  entities_to_update = []
  host.hidden = False
  host.timestamp = now
  entities_to_update.append(host)
  host.put()
  # We do not restore device for the host, since if devices are still
  # on the host it should report in next host event.
  return host
Пример #16
0
def CreateAndSaveDeviceInfoHistoryFromDeviceNote(device_serial, note_id):
  """Create and save DeviceInfoHistory from a DeviceNote.

  This method obtains current DeviceInfo based on device_serial, and create a
  DeviceInfoHistory with DeviceNote id in extra_info, then save to datastore.

  Args:
    device_serial: string, serial number of a lab device.
    note_id: int, the id of a DeviceNote.

  Returns:
    An instance of ndb.Key, the key of DeviceInfoHistory entity.
  """
  device = GetDevice(device_serial=device_serial)
  device.timestamp = common.Now()
  device_info_history = _CreateDeviceInfoHistory(device)
  if device_info_history.extra_info is None:
    device_info_history.extra_info = {}
  device_info_history.extra_info[DEVICE_NOTE_ID_KEY] = note_id
  key = device_info_history.put()
  return key
Пример #17
0
def CreateAndSaveHostInfoHistoryFromHostNote(hostname, note_id):
  """Create and save HostInfoHistory from a HostNote.

  This method obtains current HostInfo based on hostname, and create a
  HostInfoHistory with HosteNote id in extra_info, then save to datastore.

  Args:
    hostname: string, name of a lab host.
    note_id: int, the id of a HostNote.

  Returns:
    An instance of ndb.Key, the key of HostInfoHistory entity.
  """
  host = GetHost(hostname=hostname)
  host.timestamp = common.Now()
  host_info_history = _CreateHostInfoHistory(host)
  if host_info_history.extra_info is None:
    host_info_history.extra_info = {}
  host_info_history.extra_info[HOST_NOTE_ID_KEY] = note_id
  key = host_info_history.put()
  return key
def RescheduleTask(task_id, run_index, attempt_index):
    """Reschedule the command task.

  Args:
    task_id: the task's id
    run_index: the new run_index
    attempt_index: the new attempt index
  """
    task = _Key(task_id).get()

    if task:
        if task.leasable:
            # Ignore leasable tasks
            logging.info('%s is leasable, don\'t reschedule', str(task_id))
            return
        task.leasable = True
        task.schedule_timestamp = common.Now()
        task.run_index = run_index
        task.attempt_index = attempt_index
        task.put()
    else:
        logging.warning('%s doesn\'t exist in task store, don\'t reschedule',
                        str(task_id))
Пример #19
0
def HideHost(hostname):
  """Hide a host and its devices."""
  logging.info("Hide host %s.", hostname)
  host = GetHost(hostname)
  if not host:
    return None
  if host.hidden:
    logging.info("Host %s is already hidden.", hostname)
    return host
  now = common.Now()
  entities_to_update = []
  host.hidden = True
  host.timestamp = now
  entities_to_update.append(host)
  devices = GetDevicesOnHost(hostname)
  for device in devices or []:
    if device.hidden:
      continue
    logging.debug("Hide device %s.", device.device_serial)
    device.hidden = True
    device.timestamp = now
    entities_to_update.append(device)
  ndb.put_multi(entities_to_update)
  return host
Пример #20
0
def _DoCountDeviceForHost(host, devices):
  """Actually count devices for a host."""
  if not host:
    return
  if not devices:
    logging.info("No devices reported for host [%s]", host.hostname)
    if not host.total_devices and not host.device_count_summaries:
      return
    # If there is no devices but the total_devices is not 0, we need to clear
    # the count.
  now = common.Now()
  host.total_devices = 0
  host.offline_devices = 0
  host.available_devices = 0
  host.allocated_devices = 0
  device_counts = {}
  for device in devices or []:
    device_count = device_counts.get(device.run_target)
    if not device_count:
      device_count = datastore_entities.DeviceCountSummary(
          run_target=device.run_target,
          timestamp=now)
      device_counts[device.run_target] = device_count
    device_count.total += 1
    host.total_devices += 1
    if device.state in common.DEVICE_AVAILABLE_STATES:
      host.available_devices += 1
      device_count.available += 1
    elif device.state in common.DEVICE_ALLOCATED_STATES:
      host.allocated_devices += 1
      device_count.allocated += 1
    else:
      host.offline_devices += 1
      device_count.offline += 1
  host.device_count_timestamp = now
  host.device_count_summaries = list(device_counts.values())
Пример #21
0
  def _LeaseHostTasksForCluster(
      self, matcher, cluster, host, num_tasks=None):
    leased_tasks = []
    leasable_tasks = command_task_store.GetLeasableTasks(
        cluster, matcher.GetRunTargets())
    for task in leasable_tasks:
      matched_devices = matcher.Match(task)
      if not matched_devices:
        continue
      try:
        # b/27136167: Touch command to prevent coordinator from cancelling
        # during task lease.
        command = command_manager.Touch(task.request_id, task.command_id)
        # task comes from GetLeasableTasks is eventual consistent.
        # LeaseTask return the strong consistent task, so the data is more
        # accurate.
        leased_task = command_task_store.LeaseTask(task.task_id)
        if not leased_task:
          continue
        data_consistent = self._EnsureCommandConsistency(
            task.request_id, task.command_id, task.task_id)
      except grpc.RpcError as e:
        # Datastore entities can only be written to once per second.  If we fail
        # to update the command or task, log the error, and try leasing other
        # tasks.
        logging.warning("Error leasing task %s: %s", task.task_id, e)
        continue
      if not data_consistent:
        continue

      matcher.RemoveDeviceGroups(matched_devices)

      logging.debug("lease task %s to run on %s",
                    str(leased_task.task_id),
                    tuple(m.device_serial for m in matched_devices))
      plugin_data_dict = copy.copy(leased_task.plugin_data)
      plugin_data_dict[_HOSTNAME_KEY] = host.hostname
      plugin_data_dict[_LAB_NAME_KEY] = host.lab_name or common.UNKNOWN_LAB_NAME
      plugin_data_dict[_HOST_GROUP_KEY] = host.host_group
      if leased_task.schedule_timestamp:
        plugin_data_dict[_TFC_COMMAND_ATTEMPT_QUEUE_START_TIMESTAMP_KEY] = (
            common.DatetimeToAntsTimestampProperty(
                leased_task.schedule_timestamp))
      plugin_data_dict[_TFC_COMMAND_ATTEMPT_QUEUE_END_TIMESTAMP_KEY] = (
          common.DatetimeToAntsTimestampProperty(leased_task.lease_timestamp or
                                                 common.Now()))

      plugin_data_ = api_messages.MapToKeyValuePairMessages(plugin_data_dict)
      leased_tasks.append(
          CommandTask(
              request_id=leased_task.request_id,
              command_id=leased_task.command_id,
              task_id=leased_task.task_id,
              command_line=leased_task.command_line,
              request_type=leased_task.request_type,
              device_serials=[match.device_serial for match in matched_devices],
              shard_count=leased_task.shard_count,
              shard_index=leased_task.shard_index,
              run_index=leased_task.run_index,
              attempt_index=leased_task.attempt_index,
              plugin_data=plugin_data_))
      for run_target in leased_task.run_targets:
        metric.RecordCommandTimingMetric(
            cluster_id=cluster,
            run_target=run_target,
            create_timestamp=command.create_time,
            command_action=metric.CommandAction.LEASE,
            count=True)
      if matcher.IsEmpty():
        break
      if num_tasks is not None and len(leased_tasks) >= num_tasks:
        break
    return leased_tasks
Пример #22
0
def CreateRequestEventMessage(request):
  """Creates a notifier message containing request state change information.

  Args:
    request: a request entity
  Returns:
    a message used for notifying request state changes
  """
  request_id = request.key.id()
  commands = GetCommands(request_id)
  attempts = GetCommandAttempts(request_id)

  summaries = []
  errors = []
  total_test_count = 0
  failed_test_count = 0
  passed_test_count = 0
  failed_test_run_count = 0
  device_lost_detected = 0
  result_links = set()
  total_run_time_sec = 0
  error_type = None
  error_reason = None

  for command in commands:
    run_count = command.run_count
    for attempt in reversed(attempts):
      if attempt.key.parent() != command.key:
        continue

      attempt = attempt.key.get(use_cache=False, use_memcache=False)
      if attempt.start_time and attempt.end_time:
        run_time = (attempt.end_time - attempt.start_time).total_seconds()
        if run_time > 0:
          total_run_time_sec += run_time

      summary = attempt.summary or "No summary available."
      result_match = re.match(SPONGE_URI_PATTERN, summary)
      result_link = result_match.group(1) if result_match else ""

      if attempt.device_lost_detected:
        # Count devices lost regardless of the state of the attempt
        device_lost_detected += attempt.device_lost_detected

      if run_count > 0 and attempt.state == common.CommandState.COMPLETED:
        if attempt.total_test_count:
          total_test_count += attempt.total_test_count
        if attempt.failed_test_count:
          failed_test_count += attempt.failed_test_count
        if attempt.passed_test_count:
          passed_test_count += attempt.passed_test_count
        if attempt.failed_test_run_count:
          failed_test_run_count += attempt.failed_test_run_count
        summaries.append("Attempt %s: %s" % (attempt.attempt_id, summary))
        if result_link:
          result_links.add(result_link)
        run_count -= 1

      # Only surface failed attempts if the command is in an ERROR state.
      elif (attempt.state in (common.CommandState.ERROR,
                              common.CommandState.FATAL)
            and attempt.state == command.state):
        error = attempt.error or "No error message available."
        # Set request's error_type and reason as first non-empty error's
        # mapping value
        if error_type is None and attempt.error:
          error_reason, error_type = (
              command_error_type_config.GetConfig(attempt.error))
        errors.append("Attempt %s: %s %s (ERROR)" %
                      (attempt.attempt_id, result_link, error))
        if result_link:
          result_links.add(result_link)
      elif (attempt.state == common.CommandState.CANCELED and
            attempt.state == command.state and attempt.error):
        # TF currently populates the error field on CANCELED attempts when
        # allocation fails. The error contains the whole command so it can be
        # very verbose, so we are surfacing only one of the attempt errors to
        # reduce noise.
        errors = [
            "Attempt %s: %s (CANCELED)" % (attempt.attempt_id, attempt.error)
        ]

  summary = "\n".join(summaries + errors)
  logging.debug(
      "notifier notify request %s change to state %s with reason %s.",
      request.key.id(), request.state, request.cancel_reason or error_reason)
  message = api_messages.RequestEventMessage(
      type=common.ObjectEventType.REQUEST_STATE_CHANGED,
      request_id=str(request.key.id()),
      new_state=request.state,
      request=datastore_entities.ToMessage(request),
      summary=summary,
      total_test_count=total_test_count,
      failed_test_count=failed_test_count,
      passed_test_count=passed_test_count,
      result_links=list(result_links),
      total_run_time_sec=int(total_run_time_sec),
      error_reason=error_reason,
      error_type=error_type,
      event_time=common.Now(),
      failed_test_run_count=failed_test_run_count,
      device_lost_detected=device_lost_detected)
  return message
Пример #23
0
def SyncCommand(request_id, command_id, add_to_sync_queue=True):
    """Sync the command.

  Cancel the command if inactive, otherwise re-add to the sync queue to check
  back again later.

  Args:
    request_id: Request ID for the command to sync
    command_id: Command ID for the command to sync
    add_to_sync_queue: Flag to determine if this command should be added to the
      sync queue
  """
    command = command_manager.GetCommand(request_id, command_id)

    if not command:
        logging.warning('No command found to sync. Request %s Command %s',
                        request_id, command_id)
        return

    if command.state in common.FINAL_COMMAND_STATES:
        # No need to sync commands in final states
        logging.info('Command reached final state %s. Request %s Command %s',
                     command.state, request_id, command_id)
        return

    if command.state != common.CommandState.QUEUED:
        # Only consider queued commands for inactivity checks.
        if add_to_sync_queue:
            AddToSyncQueue(command)
        return

    last_active_time = command_manager.GetLastCommandActiveTime(command)
    inactive_time = common.Now() - last_active_time
    if datetime.timedelta(minutes=MAX_COMMAND_EVENT_DELAY_MIN) < inactive_time:
        # Ensure command is leasable. If a worker leases a command task but does
        # not report back within MAX_COMMAND_EVENT_DELAY, we need to make it
        # leasable again.
        logging.debug(
            '%r has no events for %r minutes (> %r); ensuring leasable.',
            command.key, inactive_time, MAX_COMMAND_EVENT_DELAY_MIN)
        try:
            command_manager.EnsureLeasable(command)
        except command_manager.CommandTaskNotFoundError:
            logging.exception('command %s %s is not leasable; cancelling...',
                              request_id, command_id)
            _HandleCommandNotExecutable(request_id, command_id)
            return

    timeout_seconds = GetCommandQueueTimeoutSeconds(command)
    if inactive_time > datetime.timedelta(seconds=timeout_seconds):
        # Command timed out.
        logging.info((
            'Canceling command %s: last active %s, inactive time %s is greater '
            'than %d seconds'), command.key, last_active_time, inactive_time,
                     timeout_seconds)
        _HandleInactiveCommand(command, request_id, command_id)
        return

    # Add to the sync queue to check again later.
    if add_to_sync_queue:
        AddToSyncQueue(command)