Пример #1
0
def KillProcess(pid, signal_=signal.SIGTERM, timeout=30, waitpid=False):
    """Kill a process given by its pid.

  @type pid: int
  @param pid: The PID to terminate.
  @type signal_: int
  @param signal_: The signal to send, by default SIGTERM
  @type timeout: int
  @param timeout: The timeout after which, if the process is still alive,
                  a SIGKILL will be sent. If not positive, no such checking
                  will be done
  @type waitpid: boolean
  @param waitpid: If true, we should waitpid on this process after
      sending signals, since it's our own child and otherwise it
      would remain as zombie

  """
    def _helper(pid, signal_, wait):
        """Simple helper to encapsulate the kill/waitpid sequence"""
        if utils_wrapper.IgnoreProcessNotFound(os.kill, pid, signal_) and wait:
            try:
                os.waitpid(pid, os.WNOHANG)
            except OSError:
                pass

    if pid <= 0:
        # kill with pid=0 == suicide
        raise errors.ProgrammerError("Invalid pid given '%s'" % pid)

    if not IsProcessAlive(pid):
        return

    _helper(pid, signal_, waitpid)

    if timeout <= 0:
        return

    def _CheckProcess():
        if not IsProcessAlive(pid):
            return

        try:
            (result_pid, _) = os.waitpid(pid, os.WNOHANG)
        except OSError:
            raise utils_retry.RetryAgain()

        if result_pid > 0:
            return

        raise utils_retry.RetryAgain()

    try:
        # Wait up to $timeout seconds
        utils_retry.Retry(_CheckProcess, (0.01, 1.5, 0.1), timeout)
    except utils_retry.RetryTimeout:
        pass

    if IsProcessAlive(pid):
        # Kill process if it's still alive
        _helper(pid, signal.SIGKILL, waitpid)
Пример #2
0
def IsProcessAlive(pid):
    """Check if a given pid exists on the system.

  @note: zombie status is not handled, so zombie processes
      will be returned as alive
  @type pid: int
  @param pid: the process ID to check
  @rtype: boolean
  @return: True if the process exists

  """
    def _TryStat(name):
        try:
            os.stat(name)
            return True
        except EnvironmentError as err:
            if err.errno in (errno.ENOENT, errno.ENOTDIR):
                return False
            elif err.errno == errno.EINVAL:
                raise utils_retry.RetryAgain(err)
            raise

    assert isinstance(pid, int), "pid must be an integer"
    if pid <= 0:
        return False

    # /proc in a multiprocessor environment can have strange behaviors.
    # Retry the os.stat a few times until we get a good result.
    try:
        return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1),
                                 0.5,
                                 args=[_GetProcStatusPath(pid)])
    except utils_retry.RetryTimeout as err:
        err.RaiseInner()
Пример #3
0
    def _flock(self, flag, blocking, timeout, errmsg):
        """Wrapper for fcntl.flock.

    @type flag: int
    @param flag: operation flag
    @type blocking: bool
    @param blocking: whether the operation should be done in blocking mode.
    @type timeout: None or float
    @param timeout: for how long the operation should be retried (implies
                    non-blocking mode).
    @type errmsg: string
    @param errmsg: error message in case operation fails.

    """
        assert self.fd, "Lock was closed"
        assert timeout is None or timeout >= 0, \
          "If specified, timeout must be positive"
        assert not (flag & fcntl.LOCK_NB), "LOCK_NB must not be set"

        # When a timeout is used, LOCK_NB must always be set
        if not (timeout is None and blocking):
            flag |= fcntl.LOCK_NB

        if timeout is None:
            self._Lock(self.fd, flag, timeout)
        else:
            try:
                retry.Retry(self._Lock, (0.1, 1.2, 1.0),
                            timeout,
                            args=(self.fd, flag, timeout))
            except retry.RetryTimeout:
                raise errors.LockError(errmsg)
Пример #4
0
def _WaitForProcess(child, timeout):
  """Waits for the child to terminate or until we reach timeout.

  """
  try:
    utils_retry.Retry(_CheckIfAlive, (1.0, 1.2, 5.0), max(0, timeout),
                      args=[child])
  except utils_retry.RetryTimeout:
    pass
Пример #5
0
def _AssertInstanceMove(inst, move_type):
    def fn():
        out = stdout_of([
            "gnt-job", "list", "--output=status", "--no-headers", "--filter",
            '"%s(%s)" in summary' % (move_type, inst.name)
        ])
        if 'success' not in out:
            raise retry.RetryAgain()

    retry.Retry(fn, 5.0, 500.0)
Пример #6
0
def _AssertNodeDrained(node):
    def fn():
        out = stdout_of([
            "gnt-node", "list", "--output=name", "--no-headers", "--filter",
            "drained"
        ])
        if node.primary not in out:
            raise retry.RetryAgain()

    retry.Retry(fn, 5.0, 500.0)
Пример #7
0
def _AssertRepairCommand():
    def fn():
        out = stdout_of([
            "gnt-job", "list", "--output=status", "--no-headers", "--filter",
            '"REPAIR_COMMAND" in summary'
        ])
        if 'success' not in out:
            raise retry.RetryAgain()

    retry.Retry(fn, 5.0, 500.0)
Пример #8
0
def _AssertInstanceRunning(inst):
    def fn():
        out = stdout_of([
            "gnt-instance", "list", "--output=status", "--no-headers",
            "--filter",
            "name == \"%s\"" % inst.name
        ])
        if "running" not in out:
            raise retry.RetryAgain()

    retry.Retry(fn, 5.0, 500.0)
Пример #9
0
def _AssertRepairTagAddition(node):
    def fn():
        tags = _GetMaintTags(node)
        if len(tags) == 0:
            raise retry.RetryAgain()
        if len(tags) > 1:
            raise qa_error.Error("Only one tag should be added")
        else:
            return tags[0]

    return retry.Retry(fn, 5.0, 500.0)
Пример #10
0
def TestJobCancellation():
    """gnt-job cancel"""
    # The delay used for the first command should be large enough for the next
    # command and the cancellation command to complete before the first job is
    # done. The second delay should be small enough that not too much time is
    # spend waiting in the case of a failed cancel and a running command.
    FIRST_COMMAND_DELAY = 10.0
    AssertCommand(["gnt-debug", "delay", "--submit", str(FIRST_COMMAND_DELAY)])

    SECOND_COMMAND_DELAY = 3.0
    master = qa_config.GetMasterNode()

    # Forcing tty usage does not work on buildbot, so force all output of this
    # command to be redirected to stdout
    job_id_output = GetCommandOutput(
        master.primary,
        "gnt-debug delay --submit %s 2>&1" % SECOND_COMMAND_DELAY)

    possible_job_ids = re.findall("JobID: ([0-9]+)", job_id_output)
    if len(possible_job_ids) != 1:
        raise qa_error.Error(
            "Cannot parse gnt-debug delay output to find job id")

    job_id = possible_job_ids[0]
    AssertCommand(["gnt-job", "cancel", job_id])

    # Now wait until the second job finishes, and expect the watch to fail due to
    # job cancellation
    AssertCommand(["gnt-job", "watch", job_id], fail=True)

    # Then check for job cancellation
    job_status = _GetJobStatus(job_id)
    if job_status != constants.JOB_STATUS_CANCELED:
        # Try and see if the job is being cancelled, and wait until the status
        # changes or we hit a timeout
        if job_status == constants.JOB_STATUS_CANCELING:
            retry_fn = functools.partial(_RetryingFetchJobStatus,
                                         constants.JOB_STATUS_CANCELING,
                                         job_id)
            try:
                # The multiplier to use is arbitrary, setting it higher could prevent
                # flakiness
                WAIT_MULTIPLIER = 4.0
                job_status = retry.Retry(retry_fn, 2.0,
                                         WAIT_MULTIPLIER * FIRST_COMMAND_DELAY)
            except retry.RetryTimeout:
                # The job status remains the same
                pass

        if job_status != constants.JOB_STATUS_CANCELED:
            raise qa_error.Error("Job was not successfully cancelled, status "
                                 "found: %s" % job_status)
Пример #11
0
def AssertStatusRetry(jid, status, interval=1.0, timeout=20.0):
    """Keeps polling the given job until a given status is reached.

  @type jid: int
  @param jid: job ID of the job to poll
  @type status: string
  @param status: status to wait for
  @type interval: float
  @param interval: polling interval in seconds
  @type timeout: float
  @param timeout: polling timeout in seconds

  @raise retry:RetryTimeout: If the status was not reached within the timeout
  """
    retry_fn = lambda: qa_job_utils.RetryingUntilJobStatus(status, str(jid))
    retry.Retry(retry_fn, interval, timeout)
Пример #12
0
def CheckSsconfInstanceList(instance):
    """Checks if a certain instance is in the ssconf instance list.

  Because ssconf is updated in an asynchronous manner, this function will retry
  reading the ssconf instance list until it either contains the desired
  instance, or a timeout is reached.

  @type instance: string
  @param instance: Instance name

  """

    instance_name = qa_utils.ResolveInstanceName(instance)

    def _CheckSsconfInstanceList():
        if instance_name not in _ReadSsconfInstanceList():
            raise retry.RetryAgain()

    retry.Retry(_CheckSsconfInstanceList, 1, 5)
Пример #13
0
      return True
    except EnvironmentError, err:
      if err.errno in (errno.ENOENT, errno.ENOTDIR):
        return False
      elif err.errno == errno.EINVAL:
        raise utils_retry.RetryAgain(err)
      raise

  assert isinstance(pid, int), "pid must be an integer"
  if pid <= 0:
    return False

  # /proc in a multiprocessor environment can have strange behaviors.
  # Retry the os.stat a few times until we get a good result.
  try:
    return utils_retry.Retry(_TryStat, (0.01, 1.5, 0.1), 0.5,
                             args=[_GetProcStatusPath(pid)])
  except utils_retry.RetryTimeout, err:
    err.RaiseInner()


def IsDaemonAlive(name):
  """Determines whether a daemon is alive

  @type name: string
  @param name: daemon name

  @rtype: boolean
  @return: True if daemon is running, False otherwise

  """
  return IsProcessAlive(utils_io.ReadPidFile(utils_io.DaemonPidFileName(name)))