Exemplo n.º 1
0
def kill_service(services, machines):
  """Kill all processes associated with specified services on the specified
  machines. E.execute() sends the commands concurrently when there is more than
  one node.

  Args:
    services: list of services to kill. 'adminconsole' and 'adminrunner' are
              currently supported.
    machines: list of hostnames
  """
  # Map of services to the command that kills the service
  find_service_pid_cmd = {
      'adminconsole': ("ps -e -o pid,args --width 100 | "
                       "grep loop_AdminConsole.py | grep -v grep | "
                       "awk '{print $1}' ; "
                       "%s" % python_kill.GetServicesListeningOn(['8000'])),
      'adminrunner': ("ps -e -o pid,args --width 100 | "
                      "grep loop_AdminRunner.py | grep -v grep | "
                      "awk '{print $1}' ; "
                      "%s" % python_kill.GetServicesListeningOn(['2100'])),
  }

  for service in services:
    if service not in find_service_pid_cmd:
      logging.error('kill_service: Unrecognised service "%s"' % service)
    else:
      logging.info('kill_service: Killing service "%s" on %d nodes...' %
                   (service, len(machines)))
      kill_cmd = ('sh -c "(kill `%s`; sleep 3; kill -9 `%s`; true)" '
                  '> /dev/null 2>&1' %
                  (find_service_pid_cmd[service],
                   find_service_pid_cmd[service]))
      E.execute(machines, kill_cmd, [], alarm=1, verbose=0)
      logging.info('kill_service: Done killing service "%s"' % service)
def SyncLogsWithCanonical(ver, canonical):
  """ sync logs with the canonical

  Arguments:
    ver: '4.6.5'
    canonical: 'ent1'
  """

  gfs_master_nodes, _ = core_utils.GFSMasterNodes()
  gfs_master_nodes.remove(canonical)
  gfs_master_dir = '/export/hda3/%s/data/gfs_master' % ver
  log_dir = '%s/ent.gfsmaster' % gfs_master_dir
  backup_log_dir = '%s.backup.%d' % (log_dir, int(time.time()))
  vars = {'gfs_master_dir':     gfs_master_dir,
          'log_dir':            log_dir,
          'backup_log_dir':     backup_log_dir,
          'canonical':          canonical
         }
  cmd = ('rm -rf %(log_dir)s.backup*; '
         'mv %(log_dir)s %(backup_log_dir)s; '
         'mkdir -p %(log_dir)s; chown nobody:nobody %(log_dir)s; '
         'rsync -c -e ssh -aHpogt %(canonical)s:%(log_dir)s %(gfs_master_dir)s'
         % vars
        )
  out = []
  enthome = '/export/hda3/%s' % ver
  E.execute(gfs_master_nodes, cmd, out, 1200, 1, 0, enthome)
Exemplo n.º 3
0
def SyncLogsWithCanonical(ver, canonical):
    """ sync logs with the canonical

  Arguments:
    ver: '4.6.5'
    canonical: 'ent1'
  """

    gfs_master_nodes, _ = core_utils.GFSMasterNodes()
    gfs_master_nodes.remove(canonical)
    gfs_master_dir = '/export/hda3/%s/data/gfs_master' % ver
    log_dir = '%s/ent.gfsmaster' % gfs_master_dir
    backup_log_dir = '%s.backup.%d' % (log_dir, int(time.time()))
    vars = {
        'gfs_master_dir': gfs_master_dir,
        'log_dir': log_dir,
        'backup_log_dir': backup_log_dir,
        'canonical': canonical
    }
    cmd = (
        'rm -rf %(log_dir)s.backup*; '
        'mv %(log_dir)s %(backup_log_dir)s; '
        'mkdir -p %(log_dir)s; chown nobody:nobody %(log_dir)s; '
        'rsync -c -e ssh -aHpogt %(canonical)s:%(log_dir)s %(gfs_master_dir)s'
        % vars)
    out = []
    enthome = '/export/hda3/%s' % ver
    E.execute(gfs_master_nodes, cmd, out, 1200, 1, 0, enthome)
Exemplo n.º 4
0
    def installkey(self):
        """ installs the staging key as the currently installed private key
    returns:
    0 on success,
    1 on empty install key (not an error)
    2 when the private key is invalid
    3 when the private key could not be distributed
    """

        self.updatelock.acquire()
        try:

            # first verify if the staging key is empty (not an error)
            if (
                not os.path.exists(ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
            ) or 0 == len(open(ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), "r").read()):
                return "1"

            # next verify that the staging key is a valid file
            verifycmd = "secure_script_wrapper -p2 %s verifystagingkey %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )
            outputList = []
            verifycode = E.execute(["localhost"], verifycmd, outputList, 60)

            if verifycode != 0:
                E.rm(["localhost"], ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                logging.error("Verify failed for key [%s]; error code: %d" % (str(outputList), verifycode))
                return "2"

            # distribute the staging key
            retcode = E.distribute(
                self.cfg.getGlobalParam("MACHINES"),
                ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"),
                60,
            )
            if retcode != 0:
                logging.error("Couldn't distribute private key, error %d" % retcode)
                return "3"

            # next, copy the key on all machines
            cmd = "secure_script_wrapper -p2 %s installkey %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )

            outputList = []
            retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList, 60)

            if retcode != 0:
                logging.error("Couldn't install cert: %s" % str(outputList))
                return "3"

            self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_KEY_INSTALLED)
        finally:
            self.updatelock.release()

        return "0"
Exemplo n.º 5
0
  def installkey(self):
    """ installs the staging key as the currently installed private key
    returns:
    0 on success,
    1 on empty install key (not an error)
    2 when the private key is invalid
    3 when the private key could not be distributed
    """

    self.updatelock.acquire()
    try:

      # first verify if the staging key is empty (not an error)
      if (not os.path.exists(ssl_cert.STAGINGKEY_FILENAME % \
                             self.cfg.getGlobalParam("ENTERPRISE_HOME"))) or \
         0 == len(open(ssl_cert.STAGINGKEY_FILENAME % \
                       self.cfg.getGlobalParam("ENTERPRISE_HOME"), "r").read()):
        return "1"

      # next verify that the staging key is a valid file
      verifycmd = "secure_script_wrapper -p2 %s verifystagingkey %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") )
      outputList = []
      verifycode = E.execute(['localhost'], verifycmd, outputList, 60)

      if verifycode != 0:
        E.rm(['localhost'],
             ssl_cert.STAGINGKEY_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"))
        logging.error("Verify failed for key [%s]; error code: %d" %
                      (str(outputList), verifycode) )
        return "2"

      # distribute the staging key
      retcode = E.distribute(self.cfg.getGlobalParam("MACHINES"),
                             ssl_cert.STAGINGKEY_FILENAME %
                             self.cfg.getGlobalParam("ENTERPRISE_HOME"), 60)
      if retcode != 0:
        logging.error("Couldn't distribute private key, error %d" % retcode)
        return "3"

      # next, copy the key on all machines
      cmd = "secure_script_wrapper -p2 %s installkey %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"))

      outputList = []
      retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList,
                          60)

      if retcode != 0:
        logging.error("Couldn't install cert: %s" % str(outputList))
        return "3"

      self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_KEY_INSTALLED)
    finally:
      self.updatelock.release()

    return "0"
Exemplo n.º 6
0
def HaltMachines(enthome, machines):
    'Stops and powers down machines.'

    logging.info("Halting machines: %s" % string.join(machines, ","))
    E.execute(machines, '/sbin/shutdown -h now &', None, 1)
    time.sleep(60)
    for machine in machines:
        SendAPCCommand(enthome, machine, APC_OFF)
    return 0
Exemplo n.º 7
0
def HaltMachines(enthome, machines):
  'Stops and powers down machines.'

  logging.info("Halting machines: %s" % string.join(machines, ","))
  E.execute(machines, '/sbin/shutdown -h now &', None, 1)
  time.sleep(60)
  for machine in machines:
    SendAPCCommand(enthome, machine, APC_OFF)
  return 0
Exemplo n.º 8
0
def RebootMachine(enthome, machine):
    'Reboots a machine.'
    if machine == E.getCrtHostName():
        # Rebooting ourself
        logging.info('Rebooting %s' % machine)
        E.execute([E.LOCALHOST], '/sbin/shutdown -r now', None, 1)
        # If we're still alive after a minute , the APC will kick in
    else:
        # Try shutting down cleanly first
        logging.info('Shutting down %s' % machine)
        E.execute([machine], '/sbin/shutdown -h now &', None, 1)
    time.sleep(60)
    logging.info('Rebooting %s via APC' % machine)
    return SendAPCCommand(enthome, machine, APC_REBOOT)
Exemplo n.º 9
0
    def installcert(self):
        """ installs the staging certificate as the currently installed certificate
    returns:
    0 on success, and
    1 on failure
    """

        self.updatelock.acquire()
        try:

            # first verify that the staging certificate is a valid file
            verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )
            outputList = []
            verifycode = E.execute(["localhost"], verifycmd, outputList, 60)

            if verifycode != 0:
                E.rm(["localhost"], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                logging.error("Verify failed for certificate [%s]; error code: %d" % (str(outputList), verifycode))
                return "1"

            # distribute the staging certificate
            retcode = E.distribute(
                self.cfg.getGlobalParam("MACHINES"),
                ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"),
                60,
            )
            if retcode != 0:
                logging.error("Couldn't distribute apache cert, error %d" % retcode)

            # next, generate the certificate on all machines
            cmd = "secure_script_wrapper -p2 %s installcert %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )

            outputList = []
            retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList, 60)

            if retcode != 0:
                logging.error("Couldn't install cert: %s" % str(outputList))
                return "1"

            self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_CERT_INSTALLED)
        finally:
            self.updatelock.release()

        return "0"
Exemplo n.º 10
0
def RebootMachine(enthome, machine):
  'Reboots a machine.'
  if machine == E.getCrtHostName():
    # Rebooting ourself
    logging.info('Rebooting %s' % machine)
    E.execute([E.LOCALHOST], '/sbin/shutdown -r now', None, 1)
    # If we're still alive after a minute , the APC will kick in
  else:
    # Try shutting down cleanly first
    logging.info('Shutting down %s' % machine)
    E.execute([machine], '/sbin/shutdown -h now &', None, 1)
  time.sleep(60)
  logging.info('Rebooting %s via APC' % machine)
  return SendAPCCommand(enthome, machine, APC_REBOOT)
Exemplo n.º 11
0
def check_klogd_syslogd_conf(machines, enthome, unittestdir=None):
  """ babysit klogd.conf and syslogd.conf file

  Recreate klogd.conf and syslogd.conf if they are not in the dir.
  Args:
    machines: ['ent1', 'ent2', 'ent3', 'ent4', 'ent5']
    enthome: '/export/hda3/4.6.0.G.27/'
    unittestdir: '/tmp/etc/localbabysitter.d/' -- used for unittest only
  """
  KLOGD_CONF_DATA = (
    "klogd = {\n"
    "  restart_command : '/sbin/service syslog restart',\n"
    "  timeout : 30,\n"
    "  interval : 30,\n"
    "  use_service_wrapper : 0,\n"
    "  pidfile : '/var/run/klogd.pid',\n"
    "}\n"
  )

  SYSLOGD_CONF_DATA = (
    "syslogd = {\n"
    "  restart_command : '/sbin/service syslog restart',\n"
    "  timeout : 30,\n"
    "  interval : 30,\n"
    "  use_service_wrapper : 0,\n"
    "  pidfile : '/var/run/syslogd.pid',\n"
    "}\n"
  )

  CHECK_CREATE_CMD = (
    'if [ ! -e "%(file)s" ]\n'
    'then\n'
    '  echo "%(data)s" > %(file)s\n'
    '  chmod 644 %(file)s\n'
    'fi\n'
  )

  if unittestdir is None:
    dir = '/etc/localbabysitter.d/'
  else:
    dir = unittestdir
  file_info = {'klogd.conf':KLOGD_CONF_DATA, 'syslogd.conf':SYSLOGD_CONF_DATA }
  for fname, data in file_info.items():
    file = os.path.join(dir, fname)
    cmd = CHECK_CREATE_CMD % {'data': data, 'file': file}
    if unittestdir is None:
      E.execute(machines, cmd, [], 0, 0, 0, enthome)
    else:
      os.system(cmd)
Exemplo n.º 12
0
  def installcert(self):
    """ installs the staging certificate as the currently installed certificate
    returns:
    0 on success, and
    1 on failure
    """

    self.updatelock.acquire()
    try:

      # first verify that the staging certificate is a valid file
      verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") )
      outputList = []
      verifycode = E.execute(['localhost'], verifycmd, outputList, 60)

      if verifycode != 0:
        E.rm(['localhost'],
             ssl_cert.STAGINGCERT_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"))
        logging.error("Verify failed for certificate [%s]; error code: %d" %
                      (str(outputList), verifycode) )
        return "1"

      # distribute the staging certificate
      retcode = E.distribute(self.cfg.getGlobalParam("MACHINES"),
                             ssl_cert.STAGINGCERT_FILENAME %
                             self.cfg.getGlobalParam("ENTERPRISE_HOME"), 60)
      if retcode != 0:
        logging.error("Couldn't distribute apache cert, error %d" % retcode)

      # next, generate the certificate on all machines
      cmd = "secure_script_wrapper -p2 %s installcert %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"))

      outputList = []
      retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList,
                          60)

      if retcode != 0:
        logging.error("Couldn't install cert: %s" % str(outputList))
        return "1"

      self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_CERT_INSTALLED)
    finally:
      self.updatelock.release()

    return "0"
Exemplo n.º 13
0
    def setcert(self, certBody):
        """ Takes a cert file body as the input, and saves it
    as the staging certificate
    returns 0 on success, or 1 on failure
    """

        retval = 0
        self.updatelock.acquire()
        try:
            try:
                open(ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), "w").write(certBody)
            except IOError:
                retval = 1
                logging.error(
                    "Couldn't save certificate to [%s]"
                    % (ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                )

            if retval == 0:
                verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
                    self.sslWrapperPath,
                    self.cfg.getGlobalParam("ENTERPRISE_HOME"),
                )
                outputList = []
                verifycode = E.execute(["localhost"], verifycmd, outputList, 60)

                if verifycode != 0:
                    retval = 1
                    E.rm(["localhost"], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                    logging.error("Couldn't verify certificate [%s]; error code: %d" % (str(outputList), verifycode))

        finally:
            self.updatelock.release()

        return "%d" % retval
Exemplo n.º 14
0
def core_op_out(ver, home, op, nodes, ignore=0, testver=None, gfs=1):
  """Executes ent_core command and returns the result.

  ent_core is running at the same time on all the nodes in different threads.

  Arguments:
    ver: '4.6.5'
    home: '/export/hda3/4.6.5'
    op: 'info'
    nodes: ['ent1', 'ent2']
    ignore: 1 - ignore errors; 0 - otherwise.
    testver: 1 - if this is a test version; 0 - otherwise.
    gfs:     1 - start gfs during activation; 0 - otherwise.

  Returns:
    error code and output from all nodes.
    (0,
     'state=RUNNING\nnodes=5\nfailures=1\nstate=RUNNING\nnodes=5\nfailures=1')
  """

  if testver == None:
    testver = is_test(ver)
  out = []
  core_base = ('/export/hda3/%s/local/google/bin/secure_script_wrapper '
               '-e /export/hda3/%s/bin/ent_core --ver=%s' % (ver, ver, ver))
  if testver:
    core_base = '%s --testver' % core_base
  if gfs == 0:
    core_base = '%s --gfs=0' % core_base
  cmd = '%s --%s'  % (core_base, op)
  ret = E.execute(nodes, cmd, out, 0, 0, 0, home, ignore=ignore)
  if ret:
    logging.error(''.join(out))
  return ret, ''.join(out)
Exemplo n.º 15
0
def _RunServeCmd(cfg, version, cmd, allnodes=0):
    """Run serve_service command.
  cmd: 'stop', 'start', 'activate', 'deactivate'
  allnodes: 1 to run command on all nodes
  """
    serve_service_cmd = (
        '/export/hda3/%s/local/google3/enterprise/legacy/scripts/'
        'serve_service.py %s %s' %
        (version, cfg.getGlobalParam('ENTERPRISE_HOME'), cmd))
    logging.info('Running: %s' % serve_service_cmd)
    if allnodes:
        machines = cfg.getGlobalParam(C.MACHINES)
    else:
        machines = [E.getCrtHostName()]

    if E.execute(machines,
                     SECURE_WRAPPER_COMMAND % ( \
                          cfg.getGlobalParam('ENTERPRISE_HOME'),
                          '-p2',
                          serve_service_cmd),
                     None, 0) != E.ERR_OK:
        logging.error('%s: failed' % serve_service_cmd)
        return 1
    logging.info('%s: completed' % serve_service_cmd)
    return 0
Exemplo n.º 16
0
def renice_svs(machine, new_priority=SVS_NICE_VALUE):
    """Renices svs on 'machine' with new_priority.

  Returns 0 on failure, 1 on success.
  """
    pid = None
    ret = 0
    # Get pid of svs.
    try:
        pid = open(SVS_PID_FILE, 'r').read()
    except:
        err = str(sys.exc_info()[:2])
        logging.error('Error reading SVS pid from %s: %s' %
                      (SVS_PID_FILE, err))

    if pid is not None:
        # Get nice value for pid
        old_priority = os_utils.GetAttr('nice', int(pid))
        if old_priority is not None and int(old_priority) != new_priority:
            logging.info('Renicing SVS to %d.' % new_priority)
            # Get group id from pid.
            pgid = os_utils.GetAttr('pgid', int(pid))
            # Renice the whole group.
            cmd = 'renice %d -g %d' % (new_priority, int(pgid))
            rc = E.execute([machine], cmd, None, 1)
            if rc == 0:
                ret = 1
            else:
                logging.error('Error renicing SVS: %d' % ret)
    return ret
Exemplo n.º 17
0
 def replicateConfig(self, machines):
   """ Replicates the config to a list of machines """
   cmd = (". %s && cd %s/local/google3/enterprise/legacy/scripts && "
          "./replicate_config.py %s %s" % (
     self.getGlobalParam("ENTERPRISE_BASHRC"), self.entHome,
     E.getCrtHostName(), self.globalParams.GetConfigFileName()))
   return E.execute(machines, cmd, None, true)
Exemplo n.º 18
0
def get_core_states(ver, home, nodes, ignore=0, testver=None):
  """ get the ENT_CORE_STATE from all nodes

  Running "ent_core --ver --info" on all the nodes sequentially

  Arguments:
    ver: '4.6.5'
    home: '/export/hda3/4.6.5'
    nodes: ['ent1', 'ent2']
    ignore: 1 - ignore errors; 0 - otherwise.
    testver: 1 - if this is a test version; 0 - otherwise.

  Returns:
    {'ent1': 'state=RUNNING\nnodes=5\nfailures=1',
     'ent2': 'state=RUNNING\nnodes=5\nfailures=1'}

  """

  if testver == None:
    testver = is_test(ver)
  states = {}
  core_base = ('/export/hda3/%s/local/google/bin/secure_script_wrapper '
               '-e /export/hda3/%s/bin/ent_core --ver=%s' % (ver, ver, ver))
  if testver:
    core_base = '%s --testver' % core_base
  cmd = '%s --info'  % core_base
  for node in nodes:
    out = []
    ret = E.execute([node], cmd, out, 0, 0, 0, home, ignore=ignore)
    if not ret:
      states[node] = ''.join(out)
  return states
Exemplo n.º 19
0
 def replicateConfig(self, machines):
     """ Replicates the config to a list of machines """
     cmd = (". %s && cd %s/local/google3/enterprise/legacy/scripts && "
            "./replicate_config.py %s %s" %
            (self.getGlobalParam("ENTERPRISE_BASHRC"), self.entHome,
             E.getCrtHostName(), self.globalParams.GetConfigFileName()))
     return E.execute(machines, cmd, None, true)
Exemplo n.º 20
0
def renice_svs(machine, new_priority=SVS_NICE_VALUE):
  """Renices svs on 'machine' with new_priority.

  Returns 0 on failure, 1 on success.
  """
  pid = None
  ret = 0
  # Get pid of svs.
  try:
    pid = open(SVS_PID_FILE, 'r').read()
  except:
    err = str(sys.exc_info()[:2])
    logging.error('Error reading SVS pid from %s: %s' % (SVS_PID_FILE, err) )

  if pid is not None:
    # Get nice value for pid
    old_priority = os_utils.GetAttr('nice', int(pid))
    if old_priority is not None and int(old_priority) != new_priority:
      logging.info('Renicing SVS to %d.' % new_priority)
      # Get group id from pid.
      pgid = os_utils.GetAttr('pgid', int(pid))
      # Renice the whole group.
      cmd = 'renice %d -g %d' % (new_priority, int(pgid))
      rc = E.execute([machine], cmd, None, 1)
      if rc == 0:
        ret = 1
      else:
        logging.error('Error renicing SVS: %d' % ret)
  return ret
Exemplo n.º 21
0
  def drain_urlmanagers(self):
    """
    We need to do this before advancing the epoch -- we can do it
    multiple times
    """
    urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager")
    num_shards = self.cfg.globalParams.GetNumShards('urlmanager')
    epoch = self.cfg.getGlobalParam('RT_EPOCH')

    for (host, port) in urlmanagers:
      # We don't do it here directly because of the timeout
      cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
            "./port_talker.py %s %d 'd DumpingStatusTable' %d" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.entHome,
          host, port, 300) # 5 min timeout
      err = E.execute([E.getCrtHostName()], cmd, None, 0)
      if E.ERR_OK != err:
        logging.error("Error draining urlmanagers [%s]" % err)
        return 1

      # Make sure that the file is out
      shard_num = servertype.GetPortShard(port)
      file = "%surlmanager_out_table_%02d_of_%02d_epoch%010d" % (
        self.cfg.getGlobalParam('NAMESPACE_PREFIX'),
        shard_num, num_shards, epoch)
      err, out = E.run_fileutil_command(self.cfg.globalParams, "ls %s" % file)
      if E.ERR_OK != err:
        logging.error("The status table file [%s] is not there" % file)
        return 1

    return 0
Exemplo n.º 22
0
  def browse(self, clientName, virtualFile, fromLine, toLine,
             grepString, fileArgs):

    grepString = urllib.quote_plus(grepString.strip())
    fromLine = string.atoi(fromLine)
    toLine   = string.atoi(toLine)

    fileLocation = self.makePhysicalFile(virtualFile, clientName, grepString,
                                         fileArgs)
    if not fileLocation:
      raise ar_exception.ARException((
        ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

    # Get valid start line and end line
    numLines = toLine - fromLine + 1;
    if numLines < 0:  numLines = 0
    toLine = fromLine + numLines - 1

    # Get the lines
    result = []

    parsedGrepString = commands.mkarg(grepString)
    if ( E.ERR_OK != E.execute(
        [E.getCrtHostName()], "head -n %s %s | tail -n %s | grep -i -F -- %s" %
        (toLine, fileLocation, numLines, parsedGrepString), result, false) ):
      return "0"

    return "%s\n%s" % (len(result[0]), result[0])
Exemplo n.º 23
0
def SendAPCCommand(enthome, machine, cmd):
  'Sends commands to the APC.'
  return E.ERR_OK != E.execute([E.LOCALHOST],
                               EXEC_REBOOT % (enthome,
                                              enthome,
                                              machine,
                                              REBOOT_PARAM[cmd]), None, 1)
Exemplo n.º 24
0
def _RunServeCmd(cfg, version, cmd, allnodes=0):
  """Run serve_service command.
  cmd: 'stop', 'start', 'activate', 'deactivate'
  allnodes: 1 to run command on all nodes
  """
  serve_service_cmd = (
      '/export/hda3/%s/local/google3/enterprise/legacy/scripts/'
      'serve_service.py %s %s' % (version,
                                  cfg.getGlobalParam('ENTERPRISE_HOME'),
                                  cmd))
  logging.info('Running: %s' % serve_service_cmd)
  if allnodes:
    machines = cfg.getGlobalParam(C.MACHINES)
  else:
    machines = [E.getCrtHostName()]

  if E.execute(machines,
                   SECURE_WRAPPER_COMMAND % ( \
                        cfg.getGlobalParam('ENTERPRISE_HOME'),
                        '-p2',
                        serve_service_cmd),
                   None, 0) != E.ERR_OK:
    logging.error('%s: failed' % serve_service_cmd)
    return 1
  logging.info('%s: completed' % serve_service_cmd)
  return 0
Exemplo n.º 25
0
def _ExecuteCommand(cmd, machines=['localhost'], out=None,
                            timeout=15*60, num_tries=2, error_filter=None):
  """ Helper file to execute a command multiple times until
  it succeeds.

  Input: cmd: command to run
  timeout: seconds to allow command to run
  machines: machine list on which to execute
  out: list of output lines
  num_tries: number of times to retry command
  error_filter: A function that is called if the cmd execution failed.  The
    takes a list of output lines as input, can filter the errors, and decideds
    if the execution counts as successful.  Returns 0 if execution succeeded,
    1 if the execution failed.
  Output: 0 for success, 1 for failure
  """
  if out == None:
    out = []
  for i in range(0, num_tries):
    if E.execute(machines, cmd, out, timeout) == 0:
      # Command was successful
      return 0
    # Execution failed
    if error_filter and error_filter(out) == 0:
      # Error filter says error is ok, execution counts as success
      logging.error('Cmd %s ignoring error: %s' % (cmd, ''.join(out)))
      return 0
    if i < num_tries-1:
      logging.error('Cmd %s error: %s, retrying.' % (cmd, ''.join(out)))
    else:
      logging.error('Cmd %s error: %s, failing.' % (cmd, ''.join(out)))
  return 1
Exemplo n.º 26
0
    def browse(self, clientName, virtualFile, fromLine, toLine, grepString,
               fileArgs):

        grepString = urllib.quote_plus(grepString.strip())
        fromLine = string.atoi(fromLine)
        toLine = string.atoi(toLine)

        fileLocation = self.makePhysicalFile(virtualFile, clientName,
                                             grepString, fileArgs)
        if not fileLocation:
            raise ar_exception.ARException(
                (ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

        # Get valid start line and end line
        numLines = toLine - fromLine + 1
        if numLines < 0: numLines = 0
        toLine = fromLine + numLines - 1

        # Get the lines
        result = []

        parsedGrepString = commands.mkarg(grepString)
        if (E.ERR_OK !=
                E.execute([E.getCrtHostName()],
                          "head -n %s %s | tail -n %s | grep -i -F -- %s" %
                          (toLine, fileLocation, numLines, parsedGrepString),
                          result, false)):
            return "0"

        return "%s\n%s" % (len(result[0]), result[0])
Exemplo n.º 27
0
def InactivateCleanup(ver, home, active_nodes):
  """Cleanup things that inactivate may have failed to handle.

  Remove /etc/google/ent4-X-X.chubby_cell and /etc/localbabysitter.d/*.
  Kills nobody processes containing \<4.x.x\>.
  """
  cmd = ('/bin/rm -f /etc/google/%s.chubby_cell '
         '/etc/localbabysitter.d/*-%s.conf' %
         (core_utils.GetCellName(ver), ver))
  # Ignore any errors
  E.execute(active_nodes, cmd, [], 0, 0, 0, home)

  cmd = "/usr/bin/pkill -KILL -u nobody -f '/%s/'" % ver.replace('.', '\\.')
  # Kill a few times just to make sure
  for _ in range(0, 3):
    # Ignore any errors
    E.execute(active_nodes, cmd, [], 0, 0, 0, home)
Exemplo n.º 28
0
def start_gfs(ver, home, nodes, ignore=0, testver=None, sync_log=1):
  """ Starts GFS.

    By default, gfs replica transaction log will be sync'ed before starting
    GFS. This operation should not take more than a few seconds. It uses rsysc
    internally.

  Arguments:
    ver:       '4.6.5'
    home:      '/export/hda3/4.6.5'
    nodes:     ['ent1', 'ent2']
    ignore:    1 - ignore errors; 0 - otherwise.
    testver:   1 - if this is a test version; 0 - otherwise.
    sync_log:  1 - sync the gfs replica transaction log with the canonical;
               0 - otherwise

  Returns:
    1 - successful. 0 - otherwise.

  """

  if sync_log:
    # make sure the transaction logs are in sync before starting gfs
    start = time.time()
    logging.info("START GFS: Sync'ing GFS replica transaction logs")
    try:
      ent_home = "/export/hda3/%s" % ver
      vm_dir = "/export/hda3/versionmanager"
      bashrc_file = "%s/local/conf/ent_bashrc" % ent_home
      entcore_dir = "%s/google3/enterprise/core" % vm_dir
      handle_gfs_no_master_cmd = (". %s && cd %s && ./handle_gfs_no_master.py "
            "%s %s %s") % (bashrc_file, entcore_dir, ver, testver, "force_sync")
      output= []
      E.execute([E.LOCALHOST], handle_gfs_no_master_cmd, output, None,
                enthome=ent_home)
    except Exception, e:
      (t, v, tb) = sys.exc_info()
      exc_msg = string.join(traceback.format_exception(t, v, tb))
      # the "sync log" operation is not a necessary step for starting
      # gfs. So ignore any error and go ahead to start gfs.
      logging.info("The following errors are ignored: [%s]" % exc_msg)
    end = time.time()
    diff = end - start
    logging.info("START GFS: STAT: Sync'ing GFS replica transaction logs"
                 " took %s seconds" % diff)
Exemplo n.º 29
0
 def execCmd(self, cmd, machines, logMsg=None):
   ''' Takes a command string and returns its output'''
   outputList = []
   retcode  = E.execute(machines, cmd, outputList, 60)
   result = str(outputList)
   if retcode != 0:
     return '1'
   if logMsg:
     self.writeAdminRunnerOpMsg(logMsg);
   return '0\n%d\n%s' % (len(result), result)
Exemplo n.º 30
0
 def execCmd(self, cmd, machines, logMsg=None):
     ''' Takes a command string and returns its output'''
     outputList = []
     retcode = E.execute(machines, cmd, outputList, 60)
     result = str(outputList)
     if retcode != 0:
         return '1'
     if logMsg:
         self.writeAdminRunnerOpMsg(logMsg)
     return '0\n%d\n%s' % (len(result), result)
Exemplo n.º 31
0
def set_install_state(machine, enterprise_home, to_status):
  """
  This function sets the status of the installed version
  to to_status
  """
  state_file_name = enterprise_home + "/STATE"
  cmd = "echo %(state)s > %(file)s && chmod 755 %(file)s && "\
  "chown nobody.nobody %(file)s" % {'state':to_status,
                                    'file':state_file_name}
  return E.ERR_OK == E.execute([machine], cmd, None, E.true)
Exemplo n.º 32
0
  def get_log_per_module(self, filename, module_name):
    '''Get the onebox module specific logging information.'''
    ent_config_type = self.cfg.getGlobalParam('ENT_CONFIG_TYPE')
    grep_string = '\"Query \\[\\|\\[%s\\]\"' % module_name

    if ent_config_type == 'CLUSTER':
      gfs_aliases = self.cfg.getGlobalParam('GFS_ALIASES')
      grep_command = 'fileutil --bnsresolver_use_svelte=false '\
                     '--gfs_aliases=%s cat %s | grep %s > %s' % (
                      gfs_aliases, filename, grep_string, self.TMP_LOG)
    else:
      grep_command = 'grep %s %s > %s' % (grep_string, filename, self.TMP_LOG)

    machine = E.getCrtHostName()
    E.execute([machine], grep_command, None, false)
    tmp_file = open(self.TMP_LOG, 'r')
    contents = tmp_file.read()
    tmp_file.close()
    os.remove(self.TMP_LOG)
    return contents
Exemplo n.º 33
0
    def getCount(self, command, machine):
        """ used for wc to return # of lines """
        result = []
        if E.ERR_OK != E.execute([machine], command, result, false):
            return -1

        # parse out the return code from the executed command
        try:
            return string.atoi(result[0])
        except:
            logging.error("Problem getting the count on %s" % command)
        return -1
  def compile_synonyms(self, source, destination):
    """Execute the synonyms compiler and return error code."""

    # In the command line, we set the maximum errors to 1 so we can halt as
    # quickly as possible. The assumption is that the input files have already
    # been validated.
    commandline = \
    'make_custom_synonym_map --synonym_file=%s --output_syn_map=%s \
      --max_errors=1' % (source, destination)

    return E.execute(
      ['localhost'], commandline, None, QueryExpansionBase.COMPILER_TIMEOUT)
Exemplo n.º 35
0
    def compile_synonyms(self, source, destination):
        """Execute the synonyms compiler and return error code."""

        # In the command line, we set the maximum errors to 1 so we can halt as
        # quickly as possible. The assumption is that the input files have already
        # been validated.
        commandline = \
        'make_custom_synonym_map --synonym_file=%s --output_syn_map=%s \
      --max_errors=1'                          % (source, destination)

        return E.execute(['localhost'], commandline, None,
                         QueryExpansionBase.COMPILER_TIMEOUT)
Exemplo n.º 36
0
  def getCount(self, command, machine):
    """ used for wc to return # of lines """
    result = []
    if E.ERR_OK != E.execute([machine], command, result, false):
      return -1;

    # parse out the return code from the executed command
    try:
      return string.atoi(result[0])
    except:
      logging.error("Problem getting the count on %s" % command );
    return -1
Exemplo n.º 37
0
    def rebootwholeappliance(self):
        #reboot after a minute. On a cluster, reboot all other nodes first.
        cmd = E.nonblocking_cmd('shutdown -r +1 &')
        machines = self.cfg.getGlobalParam("MACHINES")
        logging.info("List of machines : %s" % machines)
        my_hostname = []
        out = []
        E.execute(['localhost'], 'hostname', my_hostname, E.true, 1, 0,
                  self.cfg.getGlobalParam('ENTERPRISE_HOME'))
        logging.info('My hostname is : ' + str(my_hostname[0]))
        #shallow copy is okay because list is not nested
        machines_other = machines[:]
        machines_other.remove(my_hostname[0])
        logging.info(
            'Removed myself from the list of nodes because I will reboot myself at the end. List now is : '
            + str(machines_other))
        if len(machines_other) > 0:
            E.execute(machines_other, cmd, out, E.true, 1, 0,
                      self.cfg.getGlobalParam('ENTERPRISE_HOME'))
            logging.info('Output of command is : ' + str(out))
            out = []
        else:
            logging.info('There are no other nodes. This must be a oneway')

        logging.info('Running the command on myself : ' + str(my_hostname))
        E.execute(my_hostname, cmd, out, E.true, 1, 0,
                  self.cfg.getGlobalParam('ENTERPRISE_HOME'))
        logging.info('Output of command is : ' + str(out))

        return 0
Exemplo n.º 38
0
  def rebootwholeappliance(self):
    #reboot after a minute. On a cluster, reboot all other nodes first.
    cmd = E.nonblocking_cmd('shutdown -r +1 &')
    machines = self.cfg.getGlobalParam("MACHINES")
    logging.info("List of machines : %s" % machines)
    my_hostname=[]
    out = []
    E.execute(['localhost'],'hostname',my_hostname,E.true,1,0,self.cfg.getGlobalParam('ENTERPRISE_HOME'))
    logging.info('My hostname is : '+ str(my_hostname[0]) )
    #shallow copy is okay because list is not nested
    machines_other=machines[:]
    machines_other.remove(my_hostname[0])
    logging.info('Removed myself from the list of nodes because I will reboot myself at the end. List now is : '+ str(machines_other) )
    if len(machines_other) > 0 :
      E.execute(machines_other, cmd, out, E.true, 1, 0,self.cfg.getGlobalParam('ENTERPRISE_HOME'))
      logging.info('Output of command is : '+ str(out) )
      out = []
    else:
      logging.info('There are no other nodes. This must be a oneway')

    logging.info('Running the command on myself : '+ str(my_hostname) )
    E.execute(my_hostname, cmd, out, E.true, 1, 0,self.cfg.getGlobalParam('ENTERPRISE_HOME'))
    logging.info('Output of command is : '+ str(out) )

    return 0
Exemplo n.º 39
0
 def send_urlscheduler_command(self, command):
   schedulers = self.cfg.globalParams.GetServerHostPorts("urlscheduler")
   timeout = 60 # 1 minute is good enough
   for (machine, port) in schedulers:
     port_talker_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
           "./port_talker.py %s %d 'GET /run?Flags=%s\r\n\r\n' %d" % (
       self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
       self.cfg.entHome,
       machine, port, command, timeout)
     if E.ERR_OK != E.execute([E.getCrtHostName()], port_talker_cmd, None, 0):
       logging.error("Error talking to urlscheduler %s:%d" % (machine, port))
       return 1
   return 0
Exemplo n.º 40
0
 def send_cmd(self, server_name, cmd):
   srvrs = self.cfg.globalParams.GetServerManager().Set(server_name).Servers()
   for srvr in srvrs:
     actual_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
                  "./port_talker.py %s %d '%s' %d" % (
                  self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                  self.cfg.entHome,
                  srvr.host(), srvr.port(), cmd, 60) # 1 min timeout
     err = E.execute([E.getCrtHostName()], actual_cmd, None, 0)
     if E.ERR_OK != err:
       logging.error("Error talking to server at %s:%d" % (srvr.host(),
                                                           srvr.port()))
   return true
Exemplo n.º 41
0
 def send_cmd(self, server_name, cmd):
     srvrs = self.cfg.globalParams.GetServerManager().Set(
         server_name).Servers()
     for srvr in srvrs:
         actual_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
                      "./port_talker.py %s %d '%s' %d" % (
                      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                      self.cfg.entHome,
                      srvr.host(), srvr.port(), cmd, 60) # 1 min timeout
         err = E.execute([E.getCrtHostName()], actual_cmd, None, 0)
         if E.ERR_OK != err:
             logging.error("Error talking to server at %s:%d" %
                           (srvr.host(), srvr.port()))
     return true
Exemplo n.º 42
0
 def send_urlmanager_command(self, command):
   ret = 0
   urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager")
   for (host, port) in urlmanagers:
     # We don't do it here directly because of the timeout
     cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
           "./port_talker.py %s %d %s %d" % (
         self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
         self.cfg.entHome,
         host, port,
         commands.mkarg(command), 300)
     err = E.execute([E.getCrtHostName()], cmd, None, 0)
     if E.ERR_OK != err:
       ret = 1
   return ret
Exemplo n.º 43
0
  def SVSErrorsStatus(self, lockserv_cmd_out=None):
    """ Check SVS errors recorded by gsa-master

    Args:
      lockserv_cmd_out: {'ent1': 'machine problem Unknown\n'}
        (for unit test only)
    Return: status, desc (e.g. 0, []).
            status is 1 if there are SVS erros. Otherwise, status is 0.
    """

    # Add any SVS errors (from gsa-master) to the problem list
    all_machs_status = 0
    desc = []
    if self._ent_config == 'CLUSTER':
      if lockserv_cmd_out is None:
        version = self._cfg.getGlobalParam('VERSION')
        lockserv_cmd_prefix = core_utils.GetLSClientCmd(version,
          install_utilities.is_test(version))
      for machine in self._live_machines:
        if lockserv_cmd_out is None:
          chubby_file = '/ls/%s/svs_%s' % (core_utils.GetCellName(version),
                                           machine)
          lockserv_cmd = '%s cat %s' % (lockserv_cmd_prefix, chubby_file)
          out = []
          lockserv_status = E.execute(['localhost'], lockserv_cmd, out, 60)
        else:
          lockserv_status = 0
          if machine in lockserv_cmd_out:
            out = [lockserv_cmd_out[machine]]
          else:
            out = []
        if lockserv_status == 0 and len(out) > 0 and out[0] != '':
          errors = out[0].splitlines()
          status = 0
          for i in range(0, len(errors)):
            if (errors[i].find('unrecoverable error') >= 0 or
                errors[i].find('file system error') >= 0):
              errors[i] = '' # Ignore this error
            else:
              status = 1 # Show an error
          if status:
            # A svs error has been recorded
            all_machs_status = max(all_machs_status, status)
            errors = [e for e in errors if e != '']
            # add machine name
            desc.append('%s: %s' % (machine, ' '.join(errors)))
    return all_machs_status, desc
Exemplo n.º 44
0
def IsGFSRunning(ver, testver):
    """ if gfs_master process running on any node
  Arguments:
    ver:     '4.6.5'
    testver: 0 - not a test version. 1 - test version.
  Returns:
    1 - gfs_master processes running on some nodes. 0 - otherwise.
  """

    gfs_master_nodes, _ = core_utils.GFSMasterNodes()
    list_gfs_master_cmd = "lsof -i :%s " % core_utils.GetGFSMasterPort(testver)
    out = []
    enthome = '/export/hda3/%s' % ver
    status = E.execute(gfs_master_nodes, list_gfs_master_cmd, out, 300, 1, 0,
                       enthome)
    # ignore status for list
    return ''.join(out).find('LISTEN') != -1
Exemplo n.º 45
0
def IsGFSRunning(ver, testver):
  """ if gfs_master process running on any node
  Arguments:
    ver:     '4.6.5'
    testver: 0 - not a test version. 1 - test version.
  Returns:
    1 - gfs_master processes running on some nodes. 0 - otherwise.
  """

  gfs_master_nodes, _ = core_utils.GFSMasterNodes()
  list_gfs_master_cmd = "lsof -i :%s " % core_utils.GetGFSMasterPort(testver)
  out = []
  enthome = '/export/hda3/%s' % ver
  status = E.execute(gfs_master_nodes, list_gfs_master_cmd, out, 300, 1, 0,
                     enthome)
  # ignore status for list
  return ''.join(out).find('LISTEN') != -1
Exemplo n.º 46
0
  def importkey(self, keyBody):
    """ Takes a private key file body as the input, and saves it
    as the staging key
    returns 0 on success, or 1 on failure
    """

    retval = 0
    self.updatelock.acquire()
    try:
      try:
        open(ssl_cert.STAGINGKEY_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"), 'w').write(keyBody)
      except IOError:
        retval = 1
        logging.error("Couldn't save key to [%s]" %
                      (ssl_cert.STAGINGKEY_FILENAME %
                       self.cfg.getGlobalParam("ENTERPRISE_HOME")))

      if retval == 0:
        # check the key
        if keyBody:
          verifycmd = "secure_script_wrapper -p2 %s verifystagingkey %s" % (
            self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") )
          outputList = []
          verifycode = E.execute(['localhost'], verifycmd, outputList, 60)
        else:
          verifycode = 0

        if verifycode != 0:
          retval = 1
          E.rm(['localhost'],
               ssl_cert.STAGINGKEY_FILENAME %
               self.cfg.getGlobalParam("ENTERPRISE_HOME"))
          logging.error("Couldn't verify key [%s]; error code: %d" %
                        (str(outputList), verifycode) )


    finally:
      self.updatelock.release()

    return "%d" % retval
Exemplo n.º 47
0
 def run(self):
   db_handler = self.db_handler
   # TODO(dcz): i18n
   db_handler.writeAdminRunnerOpMsg('database sync started on source %s' %
                                    self.source)
   out_lines = []
   try:
     rtn_code = E.execute(['localhost'], self.cmd, out_lines, 0)
     if rtn_code == 0:
       # TODO(dcz): i18n
       db_handler.writeAdminRunnerOpMsg(
         'database sync succeeded on source %s' % self.source)
       db_handler.distributedbfiles(self.log)
       db_handler.distributedbfiles(self.last)
     else:
       # TODO(dcz): i18n
       db_handler.writeAdminRunnerOpMsg(
         'database sync on source %s failed with status %d' %
         (self.source, rtn_code))
   finally:
     # clean-up global dict
     del self.group[self.source]
Exemplo n.º 48
0
  def generatekey(self):
    """ creates a randomly generated staging key; returns 0 on success, 1 on
    failure """

    self.updatelock.acquire()
    try:

      cmd = "secure_script_wrapper -p2 %s generatekey %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"))

      outputList = []

      retcode = E.execute(['localhost'], cmd, outputList, 60)

      # if the command failed, we don't want to leave a malformed key around
      if retcode != 0:
        E.rm(['localhost'], ssl_cert.STAGINGKEY_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"))
        logging.error("Couldn't generate private key: %s" %str(outputList))
    finally:
      self.updatelock.release()

    return "%d" % retcode
def ExecuteWrapper(machines, commandLine, out, alarm, verbose = 1,
                   forceRemote = 0, enthome=None, num_tries=1):
  """Thin wrapper over E.execute as we need process's return code (parameter to
  exit()) and E.execute returns exit status. Too late to modify E.execute()
  method implementation as there is a lot of code that already calls this method.
  Can't confirm whether any code has come to rely on the fact that E.execute
  returns exit status code instead of process's return code. Refer E.execute()
  for it's documentation"""
  ret = 0
  for trial in range(num_tries):
    ret = E.execute(machines, commandLine, out,
                    alarm, verbose, forceRemote, enthome)
    if os.WIFEXITED(ret):
      # child process exit codes are multiplied by 256, so undo this
      ret = os.WEXITSTATUS(ret)
    if ret == 0 or (trial + 1) == num_tries:
      # either we succeed or this was the last try (there is no point in
      # sleeping after the last try)
      break
    logging.warn('%d: Execution of %s failed. Sleeping for 5 seconds...' %
                 (trial, commandLine))
    time.sleep(5)
  return ret
Exemplo n.º 50
0
  def setcert(self, certBody):
    """ Takes a cert file body as the input, and saves it
    as the staging certificate
    returns 0 on success, or 1 on failure
    """

    retval = 0
    self.updatelock.acquire()
    try:
      try:
        open(ssl_cert.STAGINGCERT_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"), 'w').write(certBody)
      except IOError:
        retval = 1
        logging.error("Couldn't save certificate to [%s]" %
                      (ssl_cert.STAGINGCERT_FILENAME %
                       self.cfg.getGlobalParam("ENTERPRISE_HOME")))

      if retval == 0:
        verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
          self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") )
        outputList = []
        verifycode = E.execute(['localhost'], verifycmd, outputList, 60)

        if verifycode != 0:
          retval = 1
          E.rm(['localhost'],
               ssl_cert.STAGINGCERT_FILENAME %
               self.cfg.getGlobalParam("ENTERPRISE_HOME"))
          logging.error("Couldn't verify certificate [%s]; error code: %d" %
                        (str(outputList), verifycode) )


    finally:
      self.updatelock.release()

    return "%d" % retval
Exemplo n.º 51
0
def EnsureGFSMasterRunning(ver, testver):
    """ check borgmon alert to see if there is a primary gfs master.
  If not, try to diagnose and handle the problem following the
  instructions described in http://www.corp.google.com/~bhmarks/redStone.html

  Arguments:
    ver:     '4.6.5'
    testver: 0 - not a test version. 1 - test version.
  Returns:
    error messages - if there is an error. None - otherwise
  """

    if (CheckNoMasterBorgmonAlert(ver, testver)
            or CheckNoMasterUsingElectionStatus(ver, testver)):
        # find out the node that is supposed to be the primary master
        master_node = GFSMasterLockHolder(ver, testver)
        handle_gfs_no_master_cmd = ("/export/hda3/%s/local/google3/enterprise/"
                                    "core/handle_gfs_no_master.py %s %s" %
                                    (ver, ver, testver))
        out = []
        status = E.execute([master_node], handle_gfs_no_master_cmd, out, 1200)
        if status:
            return ''.join(out)
    return None
Exemplo n.º 52
0
def _ExecuteCommand(cmd,
                    machines=['localhost'],
                    out=None,
                    timeout=15 * 60,
                    num_tries=2,
                    error_filter=None):
    """ Helper file to execute a command multiple times until
  it succeeds.

  Input: cmd: command to run
  timeout: seconds to allow command to run
  machines: machine list on which to execute
  out: list of output lines
  num_tries: number of times to retry command
  error_filter: A function that is called if the cmd execution failed.  The
    takes a list of output lines as input, can filter the errors, and decideds
    if the execution counts as successful.  Returns 0 if execution succeeded,
    1 if the execution failed.
  Output: 0 for success, 1 for failure
  """
    if out == None:
        out = []
    for i in range(0, num_tries):
        if E.execute(machines, cmd, out, timeout) == 0:
            # Command was successful
            return 0
        # Execution failed
        if error_filter and error_filter(out) == 0:
            # Error filter says error is ok, execution counts as success
            logging.error('Cmd %s ignoring error: %s' % (cmd, ''.join(out)))
            return 0
        if i < num_tries - 1:
            logging.error('Cmd %s error: %s, retrying.' % (cmd, ''.join(out)))
        else:
            logging.error('Cmd %s error: %s, failing.' % (cmd, ''.join(out)))
    return 1
Exemplo n.º 53
0
        sys.exit("Unable to load config file  %s" % (cp.GetConfigFileName()))

    use_invalid_config = ""
    if FLAGS.useinvalidconfig:
        use_invalid_config = "--useinvalidconfig"

    python2_invoke_string = ""
    if FLAGS.use_python2:
        # we want to invoke using python2
        python2_invoke_string = "python2 "

    action = "--start=all"
    if FLAGS.kill_only:
        # we only want to kill and not restart
        action = "--kill=all"

    cmd = ('. %s; cd %s/enterprise/legacy/production/babysitter; '
           ' %s./babysitter.py --batch %s %s --ports=%s --babyalias=localhost '
           '--lockdir=%s/tmp --mailto= --mach=%s %s' %
           (cp.var("BASHRC_FILE"), cp.var("MAIN_GOOGLE3_DIR"),
            python2_invoke_string, action, use_invalid_config, FLAGS.port,
            cp.var("ENTERPRISE_HOME"), FLAGS.machine, cp.GetConfigFileName()))
    logging.info("Executing [%s]" % cmd)
    if E.ERR_OK != E.execute([E.getCrtHostName()], cmd, None, true):
        sys.exit("Error restarting server at %s:%s" %
                 (FLAGS.machine, FLAGS.port))


if __name__ == '__main__':
    main(sys.argv)