Exemplo n.º 1
0
def _RunServeCmd(cfg, version, cmd, allnodes=0):
  """Run serve_service command.
  cmd: 'stop', 'start', 'activate', 'deactivate'
  allnodes: 1 to run command on all nodes
  """
  serve_service_cmd = (
      '/export/hda3/%s/local/google3/enterprise/legacy/scripts/'
      'serve_service.py %s %s' % (version,
                                  cfg.getGlobalParam('ENTERPRISE_HOME'),
                                  cmd))
  logging.info('Running: %s' % serve_service_cmd)
  if allnodes:
    machines = cfg.getGlobalParam(C.MACHINES)
  else:
    machines = [E.getCrtHostName()]

  if E.execute(machines,
                   SECURE_WRAPPER_COMMAND % ( \
                        cfg.getGlobalParam('ENTERPRISE_HOME'),
                        '-p2',
                        serve_service_cmd),
                   None, 0) != E.ERR_OK:
    logging.error('%s: failed' % serve_service_cmd)
    return 1
  logging.info('%s: completed' % serve_service_cmd)
  return 0
def SyncOneboxLog(config):
  """Syncs Local Onebox log file with GFS Onebox Log file ONLY on clusters.
  As of 4.6.4, this is called from scripts/periodic_script.py and from
  onebox_handler.py, when the user does View Log AND the machine is a cluster.
  """
  onebox_port = servertype.GetPortBase('oneboxenterprise')
  onebox_node = config.SERVERS[onebox_port]
  crt_machine = E.getCrtHostName()
  ent_config_type = config.var('ENT_CONFIG_TYPE')

  #If onebox server is not running no need to sync.
  if ent_config_type != 'CLUSTER' or crt_machine != onebox_node[0]:
    return

  tmp_dir     = config.var('TMPDIR')
  gfs_cell    = config.var('GFS_CELL')
  local_log_name = os.path.join(tmp_dir, config.var('ENTERPRISE_ONEBOX_LOG'))
  gfs_log_name   = os.path.join(os.sep, 'gfs', gfs_cell,
                                config.var('ENTERPRISE_ONEBOX_LOG'))

  equalize_command = 'equalize %s %s' % (local_log_name, gfs_log_name)

  # fileutil equalize copies only the difference of the log files.
  err, out = E.run_fileutil_command(config, equalize_command)
  if not err:
    return

  # files didn't match in the begining, possibly a new log file would have
  # created, copy the whole log file in such case.
  copy_command = 'cp -f %s %s' % (local_log_name, gfs_log_name)
  err, out = E.run_fileutil_command(config, copy_command)

  if err:
    logging.error('Error while syncing onebox logs.')
Exemplo n.º 3
0
 def replicateConfig(self, machines):
     """ Replicates the config to a list of machines """
     cmd = (". %s && cd %s/local/google3/enterprise/legacy/scripts && "
            "./replicate_config.py %s %s" %
            (self.getGlobalParam("ENTERPRISE_BASHRC"), self.entHome,
             E.getCrtHostName(), self.globalParams.GetConfigFileName()))
     return E.execute(machines, cmd, None, true)
Exemplo n.º 4
0
 def replicateConfig(self, machines):
   """ Replicates the config to a list of machines """
   cmd = (". %s && cd %s/local/google3/enterprise/legacy/scripts && "
          "./replicate_config.py %s %s" % (
     self.getGlobalParam("ENTERPRISE_BASHRC"), self.entHome,
     E.getCrtHostName(), self.globalParams.GetConfigFileName()))
   return E.execute(machines, cmd, None, true)
Exemplo n.º 5
0
def CollectLogs(all_machines, gws_log_dir, log_collect_dir):
  # We only run this on oneway or master node of cluster.
  master = find_master.FindMaster(2100, all_machines)
  crt_machine = E.getCrtHostName()
  if len(all_machines) != 1 and (len(master) != 1 or master[0] != crt_machine):
    logging.info('Not a oneway or cluster master node. Return!')
    return

  lockfile = '%s/lock' % log_collect_dir
  # waiting up to 5 minutes for the lock.
  lock = E.acquire_lock(lockfile, 30, breakLockAfterGracePeriod = 0)
  if lock == None:
    logging.info('Cannot grab the lock. Return!')
    return

  try:
    for machine in all_machines:
      src_pattern = '%s/partnerlog.*' % gws_log_dir
      dest_dir = '%s/%s' % (log_collect_dir, machine)

      # If it's a oneway or master node, we make a symlink to gws_log_dir instead
      # of rsync to log_collect directory
      if machine == crt_machine:
        # To make it backward compatible, we need to remove old dest_dir if it's
        # already an existing directory from previous version because in previous
        # versions we created a dir and rsynced files even on the master node and
        # one-ways.
        if os.path.exists(dest_dir) and not os.path.islink(dest_dir):
          if not E.rm(master, '%s/*' % dest_dir) or not E.rmdir(master, dest_dir):
            logging.error('Directory %s exists and cannot be cleaned.', dest_dir)
            continue
          logging.info('Cleaned existing directory %s.', dest_dir)

        if E.ln(master, gws_log_dir, dest_dir):
          logging.info('Symlink %s to directory %s:%s for logs' %
                       (dest_dir, machine, gws_log_dir))
        else:
          logging.error('Cannot make a symlink from %s to %s' %
                        (dest_dir, gws_log_dir))
        continue

      # For non-master nodes on cluster, we need to rsync those files to master node
      logging.info('Collecting logs from %s:%s into %s' % (
        machine, src_pattern, dest_dir))

      # make log directories if needed
      liblog.MakeDir(dest_dir)

      # rsync all files from one remote machine in one command.
      rsync_cmd = 'rsync --timeout=60 --size-only -vau ' \
                  ' -e ssh %s:%s %s/' % (machine, src_pattern, dest_dir)

      # rsync the logs
      (status, output) = liblog.DoCommand(rsync_cmd)
      if status != 0:
        logging.error('Failed to collect logs from %s: %s' % (
          machine, output))
  finally:
    lock.close()
    os.unlink(lockfile)
Exemplo n.º 6
0
def _RunServeCmd(cfg, version, cmd, allnodes=0):
    """Run serve_service command.
  cmd: 'stop', 'start', 'activate', 'deactivate'
  allnodes: 1 to run command on all nodes
  """
    serve_service_cmd = (
        '/export/hda3/%s/local/google3/enterprise/legacy/scripts/'
        'serve_service.py %s %s' %
        (version, cfg.getGlobalParam('ENTERPRISE_HOME'), cmd))
    logging.info('Running: %s' % serve_service_cmd)
    if allnodes:
        machines = cfg.getGlobalParam(C.MACHINES)
    else:
        machines = [E.getCrtHostName()]

    if E.execute(machines,
                     SECURE_WRAPPER_COMMAND % ( \
                          cfg.getGlobalParam('ENTERPRISE_HOME'),
                          '-p2',
                          serve_service_cmd),
                     None, 0) != E.ERR_OK:
        logging.error('%s: failed' % serve_service_cmd)
        return 1
    logging.info('%s: completed' % serve_service_cmd)
    return 0
Exemplo n.º 7
0
    def browse(self, clientName, virtualFile, fromLine, toLine, grepString,
               fileArgs):

        grepString = urllib.quote_plus(grepString.strip())
        fromLine = string.atoi(fromLine)
        toLine = string.atoi(toLine)

        fileLocation = self.makePhysicalFile(virtualFile, clientName,
                                             grepString, fileArgs)
        if not fileLocation:
            raise ar_exception.ARException(
                (ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

        # Get valid start line and end line
        numLines = toLine - fromLine + 1
        if numLines < 0: numLines = 0
        toLine = fromLine + numLines - 1

        # Get the lines
        result = []

        parsedGrepString = commands.mkarg(grepString)
        if (E.ERR_OK !=
                E.execute([E.getCrtHostName()],
                          "head -n %s %s | tail -n %s | grep -i -F -- %s" %
                          (toLine, fileLocation, numLines, parsedGrepString),
                          result, false)):
            return "0"

        return "%s\n%s" % (len(result[0]), result[0])
Exemplo n.º 8
0
def SyncOpLogs(all_machines, log_dir):
  """ This will sync the AdminRunner.OPERATOR.* logs to all machines """

  # We have to run this only on master
  master = find_master.FindMaster(2100, all_machines)

  # The name of this machine
  crt_machine = E.getCrtHostName()
  if len(master) == 1 and master[0] == crt_machine:
    for machine in all_machines:
      if machine != crt_machine:
        src_dir = '%s/AdminRunner.OPERATOR.*' % (log_dir)
        dest_dir = '%s:/%s' % (machine, log_dir)

        logging.info('Collecting operator logs from %s into %s' % (
          src_dir, dest_dir))

        rsync_cmd = 'rsync --timeout=20 --size-only -vau ' \
                     ' -e ssh %s %s/' % (src_dir, dest_dir)

        # rsync the logs
        lockfile = '%s/syncops_lock' % log_dir
        lock = E.acquire_lock(lockfile, 1, breakLockAfterGracePeriod = 0)
        if lock == None:
          logging.info('Cannot grab the lock. Return!')
          return

        try:
          (status, output) = liblog.DoCommand(rsync_cmd)
          if status != 0:
            logging.error('Failed to collect logs from %s: %s' % (
              machine, output))
        finally:
          lock.close()
          os.unlink(lockfile)
Exemplo n.º 9
0
  def browse(self, clientName, virtualFile, fromLine, toLine,
             grepString, fileArgs):

    grepString = urllib.quote_plus(grepString.strip())
    fromLine = string.atoi(fromLine)
    toLine   = string.atoi(toLine)

    fileLocation = self.makePhysicalFile(virtualFile, clientName, grepString,
                                         fileArgs)
    if not fileLocation:
      raise ar_exception.ARException((
        ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

    # Get valid start line and end line
    numLines = toLine - fromLine + 1;
    if numLines < 0:  numLines = 0
    toLine = fromLine + numLines - 1

    # Get the lines
    result = []

    parsedGrepString = commands.mkarg(grepString)
    if ( E.ERR_OK != E.execute(
        [E.getCrtHostName()], "head -n %s %s | tail -n %s | grep -i -F -- %s" %
        (toLine, fileLocation, numLines, parsedGrepString), result, false) ):
      return "0"

    return "%s\n%s" % (len(result[0]), result[0])
Exemplo n.º 10
0
  def drain_urlmanagers(self):
    """
    We need to do this before advancing the epoch -- we can do it
    multiple times
    """
    urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager")
    num_shards = self.cfg.globalParams.GetNumShards('urlmanager')
    epoch = self.cfg.getGlobalParam('RT_EPOCH')

    for (host, port) in urlmanagers:
      # We don't do it here directly because of the timeout
      cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
            "./port_talker.py %s %d 'd DumpingStatusTable' %d" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.entHome,
          host, port, 300) # 5 min timeout
      err = E.execute([E.getCrtHostName()], cmd, None, 0)
      if E.ERR_OK != err:
        logging.error("Error draining urlmanagers [%s]" % err)
        return 1

      # Make sure that the file is out
      shard_num = servertype.GetPortShard(port)
      file = "%surlmanager_out_table_%02d_of_%02d_epoch%010d" % (
        self.cfg.getGlobalParam('NAMESPACE_PREFIX'),
        shard_num, num_shards, epoch)
      err, out = E.run_fileutil_command(self.cfg.globalParams, "ls %s" % file)
      if E.ERR_OK != err:
        logging.error("The status table file [%s] is not there" % file)
        return 1

    return 0
Exemplo n.º 11
0
def PeriodicScript(config):
    """
  If we have the FANCY_CLUSTER flag on we perform some master election
  """
    global apacheRecheck

    machines = config.MACHINES

    # The two determinants : if we run sshd and the list of active AdminRunners
    sshd_alive = sshd_alive_check(config)
    tries = 6
    sleep_time = 10
    master = DetectMaster(machines, tries, sleep_time, config.var('VERSION'))
    if master is None:
        raise Error, 'Could not find master in %d tries.' % tries

    logging.info('%s is the master' % master)

    if not sshd_alive:
        # No ssh running. I restart sshd
        # sshd is not running!? ..too bad
        logging.error("sshd is not running on this machine. "\
                      "restarting sshd.")
        os.system("/etc/rc.d/init.d/sshd restart >&/dev/null </dev/null")

    # The name of this machine
    crt_machine = E.getCrtHostName()
    if master == crt_machine:  # I am the master
        # make sure gfs master is not running on the same node on clusters
        if len(machines) > 1:
            AvoidGFSMasterOnNode(config, crt_machine)

        if IsAdminRunnerAlive(6, 10):  # max_tries, sleep_time
            # Lincense stuff -- incrrease the serving time count
            IncreaseMasterCounter()
            # babysit loop_AdminRunner and loop_webserve_config
            StartAdminLoop(config, op='babysit')
        else:  # I am the master but adminrunner does not seem to be running
            logging.info(
                'I (%s) am the master but adminrunner does not seem to be'
                'running.' % crt_machine)
            BecomeMaster(config)
    else:  # I am not the master
        resync_with_master = 1
        if core_utils.CanRunGSAMaster(crt_machine):
            if IsAdminRunnerAlive(1, 0):  # max_tries, sleep_time
                # Kills adminrunner related processes, webconfig.py
                # calls KillProcessIfNotMaster
                MasterKillMyself(config)
                resync_with_master = 0
            else:
                KillProcessIfNotMaster(config)
        if resync_with_master:
            ResyncWithMaster(config.GetConfigFileName(),
                             config.ENTERPRISE_HOME, config.ENTERPRISE_USER,
                             master)

    # check all services and kill services I am not supposed to run
    KillRedundantServices(config)
Exemplo n.º 12
0
def isMaster(config):
  """Return true if is running on master node."""
  if len(config.var('MACHINES')) == 1:
    return 1
  (status, response) = port_talker.Talk(E.getCrtHostName(),
                                        core_utils.GSA_MASTER_PORT,
                                        'v is_master', 5)
  return status and response[0] == '1'
Exemplo n.º 13
0
def isMaster(config):
    """Return true if is running on master node."""
    if len(config.var('MACHINES')) == 1:
        return 1
    (status, response) = port_talker.Talk(E.getCrtHostName(),
                                          core_utils.GSA_MASTER_PORT,
                                          'v is_master', 5)
    return status and response[0] == '1'
Exemplo n.º 14
0
    def remove(self, machine):
        """  This removes a machine from the configuration  """

        if machine not in self.cfg.getGlobalParam('MACHINES'):
            logging.error("%s doesn't exist" % machine)
            return 1

        ver = self.cfg.getGlobalParam('VERSION')
        home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
        testver = install_utilities.is_test(ver)
        # if possible stop the core services, ignore return code
        install_utilities.stop_core(ver, home, [machine])

        if machine == E.getCrtHostName():
            logging.error("Cannot remove self")
            return 1

        # Halt the machine if APC is used.
        error = self.halt(machine)

        self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine)
        self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine)
        ret = core_utils.AddDeadNode(ver, testver, machine)
        # remove the chunkserver running on the node
        gfs_utils.DeleteGFSChunkservers(ver, testver, [machine])
        if ret:
            logging.error('Cannot add dead node to the lockserver.')
            # we ignore this error for now

        # now we need to remove the data disks that were on this machine
        data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS')
        if data_disks.has_key(machine):
            del data_disks[machine]
            if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks):
                return 1

        # This also saves the config file
        if not self.cfg.DoMachineAllocation():
            return 1

        # Now we need to restart babysitter because the old one
        # is out of sync after this
        serve_service_cmd = (
            ". %s && "
            "cd %s/local/google3/enterprise/legacy/scripts && "
            "./serve_service.py %s" %
            (self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
             self.cfg.getGlobalParam('ENTERPRISE_HOME'),
             self.cfg.getGlobalParam('ENTERPRISE_HOME')))
        E.exe("%s %s" % (serve_service_cmd, "babysit"))

        self.restart_crawl_processes(serve_service_cmd)

        if not mail_already_sent(M.MSG_MACHINEREMOVED % machine):
            SendMail.send(self.cfg, None, false,
                          M.MSG_MACHINEREMOVED % machine, "", true)

        return error
Exemplo n.º 15
0
  def remove(self, machine):
    """  This removes a machine from the configuration  """

    if machine not in self.cfg.getGlobalParam('MACHINES'):
      logging.error("%s doesn't exist" % machine)
      return 1

    ver = self.cfg.getGlobalParam('VERSION')
    home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
    testver = install_utilities.is_test(ver)
    # if possible stop the core services, ignore return code
    install_utilities.stop_core(ver, home, [machine])

    if machine == E.getCrtHostName():
      logging.error("Cannot remove self")
      return 1

    # Halt the machine if APC is used.
    error = self.halt(machine)

    self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine)
    self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine)
    ret = core_utils.AddDeadNode(ver, testver, machine)
    # remove the chunkserver running on the node
    gfs_utils.DeleteGFSChunkservers(ver, testver, [machine])
    if ret:
      logging.error('Cannot add dead node to the lockserver.')
      # we ignore this error for now

    # now we need to remove the data disks that were on this machine
    data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS')
    if data_disks.has_key(machine):
      del data_disks[machine]
      if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks):
        return 1

    # This also saves the config file
    if not self.cfg.DoMachineAllocation():
      return 1

    # Now we need to restart babysitter because the old one
    # is out of sync after this
    serve_service_cmd = (". %s && "
        "cd %s/local/google3/enterprise/legacy/scripts && "
        "./serve_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME')))
    E.exe("%s %s" % (serve_service_cmd, "babysit"))

    self.restart_crawl_processes(serve_service_cmd)

    if not mail_already_sent(M.MSG_MACHINEREMOVED % machine):
      SendMail.send(self.cfg, None, false,
                 M.MSG_MACHINEREMOVED % machine, "", true)

    return error
Exemplo n.º 16
0
def PeriodicScript(config):
  """
  If we have the FANCY_CLUSTER flag on we perform some master election
  """
  global apacheRecheck

  machines = config.MACHINES

  # The two determinants : if we run sshd and the list of active AdminRunners
  sshd_alive = sshd_alive_check(config)
  tries = 6
  sleep_time = 10
  master = DetectMaster(machines, tries, sleep_time, config.var('VERSION'))
  if master is None:
    raise Error, 'Could not find master in %d tries.' % tries

  logging.info('%s is the master' % master)

  if not sshd_alive:
    # No ssh running. I restart sshd
    # sshd is not running!? ..too bad
    logging.error("sshd is not running on this machine. "\
                  "restarting sshd.")
    os.system("/etc/rc.d/init.d/sshd restart >&/dev/null </dev/null");

  # The name of this machine
  crt_machine = E.getCrtHostName()
  if master == crt_machine:  # I am the master
    # make sure gfs master is not running on the same node on clusters
    if len(machines) > 1:
      AvoidGFSMasterOnNode(config, crt_machine)

    if IsAdminRunnerAlive(6, 10):  # max_tries, sleep_time
      # Lincense stuff -- incrrease the serving time count
      IncreaseMasterCounter()
      # babysit loop_AdminRunner and loop_webserve_config
      StartAdminLoop(config, op='babysit')
    else:  # I am the master but adminrunner does not seem to be running
      logging.info('I (%s) am the master but adminrunner does not seem to be'
                   'running.' % crt_machine)
      BecomeMaster(config)
  else:   # I am not the master
    resync_with_master = 1
    if core_utils.CanRunGSAMaster(crt_machine):
      if IsAdminRunnerAlive(1, 0):   # max_tries, sleep_time
        # Kills adminrunner related processes, webconfig.py
        # calls KillProcessIfNotMaster
        MasterKillMyself(config)
        resync_with_master = 0
      else:
        KillProcessIfNotMaster(config)
    if resync_with_master:
      ResyncWithMaster(config.GetConfigFileName(), config.ENTERPRISE_HOME,
                       config.ENTERPRISE_USER, master)

  # check all services and kill services I am not supposed to run
  KillRedundantServices(config)
Exemplo n.º 17
0
    def export(self, clientName, virtualFile, convert, fileArgs):

        fileLocation = self.makePhysicalFile(virtualFile, clientName, "",
                                             fileArgs)
        if not fileLocation:
            raise ar_exception.ARException(
                (ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

        return "%s %s" % (E.getCrtHostName(), fileLocation)
Exemplo n.º 18
0
  def export(self, clientName, virtualFile, convert,
             fileArgs):

    fileLocation = self.makePhysicalFile(virtualFile, clientName, "",
                                         fileArgs)
    if not fileLocation:
      raise ar_exception.ARException((
        ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

    return "%s %s" % (E.getCrtHostName(), fileLocation)
Exemplo n.º 19
0
  def GetMachineParams(self, fact_name):
    """ Get facts from the machine parameter cache

    Arguments:
      fact_name: 'memory-total'
    Returns:
      12430580
    """

    return self.mach_param_cache.GetFact(fact_name, E.getCrtHostName())
Exemplo n.º 20
0
    def GetMachineParams(self, fact_name):
        """ Get facts from the machine parameter cache

    Arguments:
      fact_name: 'memory-total'
    Returns:
      12430580
    """

        return self.mach_param_cache.GetFact(fact_name, E.getCrtHostName())
Exemplo n.º 21
0
  def haltcluster(self):
    machines = self.cfg.getGlobalParam("MACHINES")
    machines.remove(E.getCrtHostName())

    msg = M.MSG_LOGSHUTDOWN
    self.writeAdminRunnerOpMsg(msg)
    if len(machines) > 0:
      # Halt all other machines now
      if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                               machines):
        # just send an email
        if not mail_already_sent(M.MSG_MACHINENEEDSHALT % string.join(machines, " ")):
          SendMail.send(self.cfg, None, false,
                      M.MSG_MACHINENEEDSHALT % string.join(machines, " "),
                      "", true)
      # Halt this machine after a delay
      time.sleep(120)
    rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                          [E.getCrtHostName()])
    return 0
Exemplo n.º 22
0
 def send_urlscheduler_command(self, command):
   schedulers = self.cfg.globalParams.GetServerHostPorts("urlscheduler")
   timeout = 60 # 1 minute is good enough
   for (machine, port) in schedulers:
     port_talker_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
           "./port_talker.py %s %d 'GET /run?Flags=%s\r\n\r\n' %d" % (
       self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
       self.cfg.entHome,
       machine, port, command, timeout)
     if E.ERR_OK != E.execute([E.getCrtHostName()], port_talker_cmd, None, 0):
       logging.error("Error talking to urlscheduler %s:%d" % (machine, port))
       return 1
   return 0
Exemplo n.º 23
0
 def send_cmd(self, server_name, cmd):
   srvrs = self.cfg.globalParams.GetServerManager().Set(server_name).Servers()
   for srvr in srvrs:
     actual_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
                  "./port_talker.py %s %d '%s' %d" % (
                  self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                  self.cfg.entHome,
                  srvr.host(), srvr.port(), cmd, 60) # 1 min timeout
     err = E.execute([E.getCrtHostName()], actual_cmd, None, 0)
     if E.ERR_OK != err:
       logging.error("Error talking to server at %s:%d" % (srvr.host(),
                                                           srvr.port()))
   return true
Exemplo n.º 24
0
    def haltcluster(self):
        machines = self.cfg.getGlobalParam("MACHINES")
        machines.remove(E.getCrtHostName())

        msg = M.MSG_LOGSHUTDOWN
        self.writeAdminRunnerOpMsg(msg)
        if len(machines) > 0:
            # Halt all other machines now
            if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                                     machines):
                # just send an email
                if not mail_already_sent(
                        M.MSG_MACHINENEEDSHALT % string.join(machines, " ")):
                    SendMail.send(
                        self.cfg, None, false,
                        M.MSG_MACHINENEEDSHALT % string.join(machines, " "),
                        "", true)
            # Halt this machine after a delay
            time.sleep(120)
        rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                              [E.getCrtHostName()])
        return 0
Exemplo n.º 25
0
def main(argv):
  pywrapfile.File.Init()
  config = entconfig.EntConfig(argv[0])
  if not config.Load():  sys.exit(__doc__)

  # Collect logs only if active
  state = install_utilities.install_state(config.var('VERSION'))
  if not state in [ 'ACTIVE', 'SERVE' ]:
    sys.exit(0)

  # NO collection for sitesearches:
  if config.var('SITESEARCH_INTERFACE'):
    sys.exit(0)

  # If I'm not a config replica I don't collect the logs..
  replicas = config.var('CONFIG_REPLICAS')
  crt_machine = E.getCrtHostName()
  if not crt_machine in replicas:
    logging.error('Not a replica')
    sys.exit(0)

  gws_log_dir = liblog.get_gws_log_dir(config)
  collect_dir = liblog.get_collect_dir(config)
  partition_dir = liblog.get_partition_dir(config)
  apache_dir = liblog.get_apache_dir(config)
  click_dir = liblog.get_click_dir(config)
  directory_map_file = liblog.get_directory_map_file(config)

  # in case cron job starts before adminrunner
  liblog.MakeDir(collect_dir)
  liblog.MakeDir(partition_dir)
  liblog.MakeDir(apache_dir)
  liblog.MakeDir(click_dir)

  # Collect Logs from machines
  all_machines = config.var('MACHINES')
  CollectLogs(all_machines, gws_log_dir, collect_dir)

  # Partition gwslogs by collections and convert to apache logs
  preprocess_logs.PartitionLogs(config)

  # Sanitize collection directory map
  coll_directory_map = liblog.CollectionDirectoryMap(directory_map_file)
  if coll_directory_map.sanitizeMap(partition_dir, apache_dir, click_dir):
    coll_directory_map.saveToDisk()

  # Send the OP logs to all machines
  SyncOpLogs(all_machines, config.var('LOGDIR'))

  logging.info('Done')
Exemplo n.º 26
0
def RebootMachine(enthome, machine):
  'Reboots a machine.'
  if machine == E.getCrtHostName():
    # Rebooting ourself
    logging.info('Rebooting %s' % machine)
    E.execute([E.LOCALHOST], '/sbin/shutdown -r now', None, 1)
    # If we're still alive after a minute , the APC will kick in
  else:
    # Try shutting down cleanly first
    logging.info('Shutting down %s' % machine)
    E.execute([machine], '/sbin/shutdown -h now &', None, 1)
  time.sleep(60)
  logging.info('Rebooting %s via APC' % machine)
  return SendAPCCommand(enthome, machine, APC_REBOOT)
Exemplo n.º 27
0
 def send_cmd(self, server_name, cmd):
     srvrs = self.cfg.globalParams.GetServerManager().Set(
         server_name).Servers()
     for srvr in srvrs:
         actual_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
                      "./port_talker.py %s %d '%s' %d" % (
                      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                      self.cfg.entHome,
                      srvr.host(), srvr.port(), cmd, 60) # 1 min timeout
         err = E.execute([E.getCrtHostName()], actual_cmd, None, 0)
         if E.ERR_OK != err:
             logging.error("Error talking to server at %s:%d" %
                           (srvr.host(), srvr.port()))
     return true
Exemplo n.º 28
0
def RebootMachine(enthome, machine):
    'Reboots a machine.'
    if machine == E.getCrtHostName():
        # Rebooting ourself
        logging.info('Rebooting %s' % machine)
        E.execute([E.LOCALHOST], '/sbin/shutdown -r now', None, 1)
        # If we're still alive after a minute , the APC will kick in
    else:
        # Try shutting down cleanly first
        logging.info('Shutting down %s' % machine)
        E.execute([machine], '/sbin/shutdown -h now &', None, 1)
    time.sleep(60)
    logging.info('Rebooting %s via APC' % machine)
    return SendAPCCommand(enthome, machine, APC_REBOOT)
Exemplo n.º 29
0
 def send_urlmanager_command(self, command):
   ret = 0
   urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager")
   for (host, port) in urlmanagers:
     # We don't do it here directly because of the timeout
     cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
           "./port_talker.py %s %d %s %d" % (
         self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
         self.cfg.entHome,
         host, port,
         commands.mkarg(command), 300)
     err = E.execute([E.getCrtHostName()], cmd, None, 0)
     if E.ERR_OK != err:
       ret = 1
   return ret
Exemplo n.º 30
0
def main():
  # on the GSA-Lite, there is no SVS, so just return
  if core_utils.GetEntConfigVar('ENT_CONFIG_TYPE') == 'LITE':
    return 0

  enthome = sys.argv[1]
  if len(sys.argv) > 2:
    machine = sys.argv[2]
  else:
    machine = E.getCrtHostName()
  cfg = entconfig.EntConfig(enthome)
  if not cfg.Load():
    return 1
  restart_svs(machine)
  return 0
Exemplo n.º 31
0
def main():
    # on the GSA-Lite, there is no SVS, so just return
    if core_utils.GetEntConfigVar('ENT_CONFIG_TYPE') == 'LITE':
        return 0

    enthome = sys.argv[1]
    if len(sys.argv) > 2:
        machine = sys.argv[2]
    else:
        machine = E.getCrtHostName()
    cfg = entconfig.EntConfig(enthome)
    if not cfg.Load():
        return 1
    restart_svs(machine)
    return 0
Exemplo n.º 32
0
def KillRedundantServices(config):

    crt_machine = E.getCrtHostName()
    servers = config.SERVERS

    for server_port, server_machines in servers.items():

        if crt_machine not in server_machines:
            response = port_talker.Talk("localhost", server_port, "v", 40)
            # kill the server if it is not supposed to run on the localhost
            if response[0]:
                server_to_kill = servertype.GetPortType(server_port)
                logging.info("kill service: %s" % server_to_kill)
                servertype.KillServer(server_to_kill, config, crt_machine,
                                      server_port)
Exemplo n.º 33
0
def KillRedundantServices(config):

  crt_machine = E.getCrtHostName()
  servers = config.SERVERS

  for server_port, server_machines in servers.items():

    if crt_machine not in server_machines:
      response = port_talker.Talk("localhost", server_port, "v", 40)
      # kill the server if it is not supposed to run on the localhost
      if response[0]:
        server_to_kill = servertype.GetPortType(server_port)
        logging.info("kill service: %s" % server_to_kill)
        servertype.KillServer(server_to_kill, config, crt_machine,
                              server_port)
Exemplo n.º 34
0
  def init_service(self, ent_home):
    """
    Does the actual initialization. Reads the config file in the
    cp (EntConfig) member and initializes some members for easy access
    to usual parameters
    """

    self.cp = entconfig.EntConfig(ent_home)
    if not self.cp.Load():
      sys.exit("Cannot load the config file %s" % self.cp.GetConfigFileName())

    # Get some params for easy access
    self.configfile    = self.cp.GetConfigFileName()
    self.version       = str(self.cp.var("VERSION"))
    self.entid_tag     = "ENT_ID=%s_%s" % (self.version, self.service_name)
    self.ent_user      = self.cp.var("ENTERPRISE_USER")
    self.ent_group     = self.cp.var("ENTERPRISE_GROUP")
    self.ent_bashrc    = self.cp.var("ENTERPRISE_BASHRC")
    self.ent_home      = self.cp.var("ENTERPRISE_HOME")
    self.googlebot_dir = self.cp.var("GOOGLEBOT_DIR")
    self.version_tmpdir= "%s/tmp" % self.cp.var("ENTERPRISE_HOME")
    self.tmpdir        = self.cp.var("TMPDIR")
    self.logdir        = self.cp.var("LOGDIR")
    self.datadir       = self.cp.var("DATADIR")
    self.scripts_dir   = ("%s/local/google3/enterprise/legacy/scripts" %
                          self.ent_home)
    self.util_dir      = ("%s/local/google3/enterprise/legacy/util" %
                          self.ent_home)
    self.machines      = self.cp.var("MACHINES")

    # The master depends on the install state : for active / test / install
    # we have the adminrunner on the master, else we get it from MASTER
    # parameter
    self.install_state = install_utilities.install_state(
      self.version, rootdir = self.cp.var('ENT_DISK_ROOT'))
    self.local_machine  = E.getCrtHostName()
    testver = install_utilities.is_test(self.version)
    if self.install_state in ["ACTIVE", "TEST", "INSTALL"]:
      try:
        self.master_machine = find_master.FindMasterUsingChubby(self.cp.var('VERSION'))
      except core_utils.EntMasterError, e:
        # Something is seriously wrong.
        logging.error("ERROR: Couldn't determine master")
        # Assume we aren't master, so we can at least do inactivate
        self.master_machine = None
Exemplo n.º 35
0
  def linecount(self, clientName, virtualFile, grepString, fileArgs):

    grepString = urllib.quote_plus(grepString.strip())

    fileLocation = self.makePhysicalFile(virtualFile, clientName, grepString,
                                         fileArgs)

    if not fileLocation:
      raise ar_exception.ARException((
        ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

    # find physical file size (line count)
    parsedGrepString = commands.mkarg(grepString)
    lineCount = self.getCount(
        "grep -i -F -- %s %s | wc -l | awk '{print $1}'" %
        (parsedGrepString, fileLocation), E.getCrtHostName())

    return lineCount
Exemplo n.º 36
0
    def linecount(self, clientName, virtualFile, grepString, fileArgs):

        grepString = urllib.quote_plus(grepString.strip())

        fileLocation = self.makePhysicalFile(virtualFile, clientName,
                                             grepString, fileArgs)

        if not fileLocation:
            raise ar_exception.ARException(
                (ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

        # find physical file size (line count)
        parsedGrepString = commands.mkarg(grepString)
        lineCount = self.getCount(
            "grep -i -F -- %s %s | wc -l | awk '{print $1}'" %
            (parsedGrepString, fileLocation), E.getCrtHostName())

        return lineCount
Exemplo n.º 37
0
  def get_log_per_module(self, filename, module_name):
    '''Get the onebox module specific logging information.'''
    ent_config_type = self.cfg.getGlobalParam('ENT_CONFIG_TYPE')
    grep_string = '\"Query \\[\\|\\[%s\\]\"' % module_name

    if ent_config_type == 'CLUSTER':
      gfs_aliases = self.cfg.getGlobalParam('GFS_ALIASES')
      grep_command = 'fileutil --bnsresolver_use_svelte=false '\
                     '--gfs_aliases=%s cat %s | grep %s > %s' % (
                      gfs_aliases, filename, grep_string, self.TMP_LOG)
    else:
      grep_command = 'grep %s %s > %s' % (grep_string, filename, self.TMP_LOG)

    machine = E.getCrtHostName()
    E.execute([machine], grep_command, None, false)
    tmp_file = open(self.TMP_LOG, 'r')
    contents = tmp_file.read()
    tmp_file.close()
    os.remove(self.TMP_LOG)
    return contents
Exemplo n.º 38
0
def main(argv):
  config = entconfig.EntConfig(argv[0])
  if not config.Load():  sys.exit(__doc__)
  try:
    crawl = argv[1]
    port = string.atoi(argv[2])
  except:
    sys.exit(__doc__)

  machines = config.var('MACHINES')
  master = find_master.FindMaster(port, machines)

  # The name of this machine
  crt_machine = E.getCrtHostName()

  if len(master) != 1 or master[0] != crt_machine:
    logging.info("I am not the master")
    sys.exit(0) # not a problem

  # find out the date 24 hours ago
  year,month,day = time.localtime(time.time() - 60*60*24 )[0:3]
  date1 = "date_%d_%d_%d" % (month, day, year)
  date2 = "date %d %d %d" % (month, day, year)

  logging.info("Running %s for %s" % (sys.argv[0], date2))

  # 1. run log_report.py for each crawl
  ar = adminrunner_client.AdminRunnerClient("localhost", port)
  if not ar.LogReportStart(crawl, date1):
    sys.exit("Error starting crawl for " + crawl)
  logging.info("Started log_report for %s/%s" %(crawl, date1))

  # 2. run apache_log.py for each crawl
  apache_cmd = "./apache_log.py %s update %s %s" % (argv[0], crawl, date2)
  (status, output) = commands.getstatusoutput(apache_cmd)
  if status != 0:
    logging.fatal("apache_log failed: %s" % output)
  logging.info("Finished apache_log: %s" % output)

  logging.info("Done")
Exemplo n.º 39
0
def main(argv):
    config = entconfig.EntConfig(argv[0])
    if not config.Load(): sys.exit(__doc__)
    try:
        crawl = argv[1]
        port = string.atoi(argv[2])
    except:
        sys.exit(__doc__)

    machines = config.var('MACHINES')
    master = find_master.FindMaster(port, machines)

    # The name of this machine
    crt_machine = E.getCrtHostName()

    if len(master) != 1 or master[0] != crt_machine:
        logging.info("I am not the master")
        sys.exit(0)  # not a problem

    # find out the date 24 hours ago
    year, month, day = time.localtime(time.time() - 60 * 60 * 24)[0:3]
    date1 = "date_%d_%d_%d" % (month, day, year)
    date2 = "date %d %d %d" % (month, day, year)

    logging.info("Running %s for %s" % (sys.argv[0], date2))

    # 1. run log_report.py for each crawl
    ar = adminrunner_client.AdminRunnerClient("localhost", port)
    if not ar.LogReportStart(crawl, date1):
        sys.exit("Error starting crawl for " + crawl)
    logging.info("Started log_report for %s/%s" % (crawl, date1))

    # 2. run apache_log.py for each crawl
    apache_cmd = "./apache_log.py %s update %s %s" % (argv[0], crawl, date2)
    (status, output) = commands.getstatusoutput(apache_cmd)
    if status != 0:
        logging.fatal("apache_log failed: %s" % output)
    logging.info("Finished apache_log: %s" % output)

    logging.info("Done")
Exemplo n.º 40
0
def StartupWork(cfg, state):

  try:
    #############################################################################
    # check memory-total
    logging.info("check memory-total")
    if not cfg.CheckMachineMemory():
      logging.fatal("failed to check memory-total")
      return

    # One-time Initialization Work:
    # In fresh mode we first initialize the global default
    if install_utilities.GetInitState(cfg.globalParams) == C.FRESH:
      logging.info("initializing global default files")
      if not cfg.globalParams.InitGlobalDefaultFiles(overwrite=false):
        logging.fatal("failed to initialize global default files")
        return
      # Call to saveParams will also distribute the newly created global
      # default files.
      logging.info("saving params and distributing global default files.")
      logging.flush()
      # This is a best effort mechanism. We try upto 6 times to update the
      # files. We assume that we'll get the files to all alive machines. If
      # a machine can't get the files then we assume that it is dead and won't
      # become a master with missing default files.
      cfg.saveParams(retry=6)
      logging.flush()
      install_utilities.SetInitState(cfg, C.CONFIG_FILES_INITIALIZED)
      logging.info('system initialization for %s state successful' %
          C.CONFIG_FILES_INITIALIZED)

    logging.flush()
    # In install mode -- bail out now
    if state == "INSTALL":
      cfg.startupDone()
      return

    # Perform reset index if we got interrupted in the middle of it
    reset_index.ResetIndex(cfg, only_if_in_progress=1)

    try:
      localhost =  E.getCrtHostName()
      if not cfg.setGlobalParam('MASTER', localhost):
        logging.fatal("Error setting the MASTER to %s" % localhost)
    except IOError:
      logging.fatal("Couldn't set MASTER -> exiting")

    # Allocate machines for the empty slots
    logging.info("doing machine allocation")
    if not cfg.DoMachineAllocation():
      logging.info("failed to do machine allocation")

    # In the next install stage (INSTALL) mode we go and finish up the work
    if (install_utilities.GetInitState(cfg.globalParams) ==
          C.CONFIG_FILES_INITIALIZED):
      logging.info("doing system initialization")

      # datadirs are initially made (by the base rpm) to not have any data disks
      # because it doesn't know which disks to use, but the datadir is needed
      # to get things running.
      logging.info("creating datadirs")
      if not cfg.createDataDirs(cfg.getGlobalParam(C.MACHINES)):
        logging.fatal("failed to make datadirs on machines")
        return

      # Create GFS subdirectories
      if cfg.getGlobalParam(C.GFS_CELL):
        logging.info("creating gfs subdirectories")
        if not cfg.createGFSChunkSubDirs():
          logging.fatal("failed creating the gfs subdirectories")
          return

      # ensure initial spelling data is present
      cfg.EnsureSpellingData()

      # create a default collection w/content of mainCrawl / frontend
      logging.info("Creating default collection ...")
      cfg.CreateDefaultCollection()
      logging.info("Creating default frontend ...")
      cfg.CreateDefaultFrontend()
      logging.info("Assiging default collection and frontend ...")
      cfg.AssignDefaultCollAndFront()

      # create some initial files needed by various backend
      logging.info("Creating default backend files ...")
      cfg.CreateDefaultBackendFiles()

      # create built in query expansion entry
      logging.info("Creating default query expansion entry ...")
      cfg.CreateDefaultQueryExpEntry()

      install_utilities.SetInitState(cfg, C.INITIALIZED)
      logging.info("system initialization successful")
      # we start the serve_service as this is fresh install
      logging.info('Starting serve service...')
      RunServeService(cfg.globalParams, 'start')
    else:
      # we restart the babysitter because the servers map may have changed
      logging.info('Babysitting serve service after allocation...')
      RunServeService(cfg.globalParams, 'babysit')

    logging.info("Saving params")
    cfg.saveParams()

    # now we're ready to run; end startup mode
    logging.info("Done with startup")
    cfg.startupDone()

  except Exception, e:
    (t, v, tb) = sys.exc_info()
    exc_msg = string.join(traceback.format_exception(t, v, tb))
    logging.error(exc_msg)
    logging.fatal("StartupWork failed with exception: %s; %s" % (
      e, traceback.format_tb(tb)))
Exemplo n.º 41
0
    def makePhysicalFile(self, virtualFile, clientName, grepString, fileArg):
        """
    Makes a physical file from a virtual one
    Creates a temp file with results from grep operation/cating of files
    Returns: [machine name], [file name]
    return null on error
    """

        # Sanitize fileArg.
        fileArg = ''.join([
            x for x in fileArg
            if x in string.ascii_letters + string.digits + '_-%'
        ])

        # Translate from String to fileId
        if not virtualFile or not FILE_TABLE.has_key(virtualFile):
            return None

        # For each file that we can export we have to have an entry in the
        # global FILE_TABLE
        fe = FILE_TABLE[virtualFile]

        pathIn = fe.getPathIn(self.cfg.globalParams, clientName, fileArg,
                              grepString)
        pathOut = fe.getPathOut(self.cfg.globalParams, clientName, fileArg,
                                grepString)
        auxGrepString = fe.aux_grep
        auxCutString = fe.aux_cut
        machine = E.getCrtHostName()

        tmpPath = None

        # Copy web log from GFS to the log directory if necessary.
        if virtualFile == 'WEB_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \
            not os.path.exists(pathIn):
            ok = self.cfg.logmanager.CopyRawReportFromGfsToLocal(
                fileArg, clientName)
            if not ok or not os.path.exists(pathIn):
                logging.error('Failed on CopyRawReportFromGfsToLocal()')
                return None

        # Copy the feed log from GFS to the log directory if necessary.
        elif virtualFile == 'FEED_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \
             not os.path.exists(pathIn):
            tmpPath = pathOut + "_fromGFS"
            (status, output) = E.run_fileutil_command(
                self.cfg.globalParams, "cat %s > %s" % (pathIn, tmpPath), 5)
            if E.ERR_OK != status:
                logging.error("Failed to copy %s to %s" % (pathIn, tmpPath))
                return None
            pathIn = tmpPath

        # Count the files that we can get
        files = E.ls([machine], pathIn)
        if not files:
            numFiles = 0
        else:
            numFiles = len(files)

        # If we need only one file, and there are more than one, use the
        # last one.
        if numFiles > 0 and not fe.multi_files:
            pathIn = files[-1]

        # Create the auxiliary command to create the actual file
        command = None
        if numFiles == 0:
            # No files availavle.
            command = "echo -e '' > %s" % pathOut
        else:
            if virtualFile == 'FEED_LOG':
                command = "tail -n +2 %s " % pathIn
                if fe.do_tac:
                    command = command + " | tac "
            # If we reverse the files before displaying
            elif fe.do_tac:
                command = "tac `ls -r %s` " % pathIn

            # Grep the lines we want
            if auxGrepString:
                if not command:
                    command = "cat `ls -r %s`" % pathIn
                command = command + " | grep -- %s" % commands.mkarg(
                    auxGrepString)

            # Maybe we need another grep
            if grepString:
                if not command:
                    command = "cat `ls -r %s`" % pathIn
                parsedGrepString = commands.mkarg(grepString)
                command = command + " | grep -i -F -- %s" % parsedGrepString

            # Maybe a cut as well
            if auxCutString:
                if not command:
                    command = "cat `ls -r %s`" % pathIn
                command = command + " | cut %s" % auxCutString

            if command:
                command = command + " > " + pathOut

            # execute the command  "file+operation > temp_filename"
            # if the command is null just use the path we have.
            if not command:
                return pathIn

        E.execute([machine], command, None, false)
        if tmpPath:
            E.rm([machine], tmpPath)

        return pathOut
#!/usr/bin/python2.4
#
# This handy command reads global config file on the current machine and
# writes the value of CONFIGVERSION parameter together with the machine name.
# (Used by periodic_script.py)
#
###############################################################################
"""
Usage :
  get_config_version.py <enthome>
"""

import sys
from google3.enterprise.legacy.util import E
from google3.enterprise.legacy.adminrunner import entconfig

if __name__ == '__main__':
    config = entconfig.EntConfig(sys.argv[1])
    if not config.Load():
        version = ""
    else:
        version = config.var("CONFIGVERSION")

    print "%s-%s" % (version, E.getCrtHostName())

###############################################################################
Exemplo n.º 43
0
def main(argv):
  """ expects: argv[0] to be a machine name, argv[1] be a google_config file
  - copys google_config file from the machine specified into the local machine
    (by calling CopyFile() function)
  - calls ReplicateConfig() - which """
  if len(argv) < 2:
    sys.exit(__doc__)

  machine = argv[0]
  global_file = argv[1]
  global_dir = os.path.dirname(global_file)

  if E.getCrtHostName() == machine:
    logging.info("I do not try to replicate to myself")
    sys.exit(0)

  config = config_factory.ConfigFactory().CreateConfig(global_file)
  if not config.Load():
    sys.exit("ERROR: Cannot read file %s " % global_file)

  file_to_copy = [ C.ETC_SYSCONFIG,
                   global_file,
                   "%s/lic_counter" % global_dir,
                   "%s/lic_counter.bck" % global_dir,
                   "%s/passwd" % global_dir,
                   "/export/hda3/versionmanager/vmanager_passwd",
                   "%s/server.p12" % global_dir
                   ]
  # Sync all the files
  for f in file_to_copy:
    CopyFile(machine, f, production_tmp)

  # sync apache certificate
  CopyFileAsRoot(machine, (ssl_cert.CERT_FILENAME % config.var('ENTERPRISE_HOME')),
                 production_tmp, config)

  # sync private key
  CopyFileAsRoot(machine, (ssl_cert.KEY_FILENAME % config.var('ENTERPRISE_HOME')),
                 production_tmp, config)

  # sync the support request repository directory
  # GRR: ReplicateDirectory(machine, SUPPORTREQUEST_RECORDS_DIR)

  # replicate config including per collection/frontend stuff
  # some dirs in conf don't need to be rsync'ed
  if core_utils.CanRunGSAMaster(E.getCrtHostName()):
    exclude_patterns = ['cmr_working/failure/', 'cmr_working/success/',
                        'cmr_working/statusz/']
  else:
    exclude_patterns = ['cmr_working/', 'cmr/', 'fixer/', 'fixer_cmr/', 'gems']
  ReplicateDirectory(machine, "%s/local/conf/" % config.var('ENTERPRISE_HOME'),
                     exclude_patterns)

  # to replicate per frontend gws stuff (keymatches, gws bad urls)
  ReplicateDirectory(machine, "%s/gws/" % config.var('GOOGLEDATA'))

  # need to adjust NTP if master has changed
  # assumption : this machine is not the master
  if 'PILE' != config.var('ENT_CONFIG_TYPE'):
    ent_home = E.getEnterpriseHome()

    os.system("%s NTP %s" % (C.RECONFIGURE_COMMAND % ent_home, machine))

    # also make sure DNS is up to date
    os.system("%s DNS %s %s" % (
      C.RECONFIGURE_COMMAND % ent_home,
      config.var('BOT_DNS_SERVERS'),
      config.var('DNS_SEARCH_PATH')))
def doReconfigureNet(config, machines=None, i_am_master=1,
                     force_ntp_reconfig=0):
  """ reconfigure serveriron, DNS, NTPs, iptables, timezone as needed.
  Force NTP server reconfiguration if force_ntp_reconfig=1
  """

  # first we need to reconfigure our external IP address
  ret1 = ret2 = ret4 = ret5 = ret6 = ret8 = 0

  configType = config.var('ENT_CONFIG_TYPE')
  ent_home = E.getEnterpriseHome()
  if not machines:
    machines = config.var('MACHINES')

  # Sanity check
  if not config.var('EXTERNAL_WEB_IP'):
    logging.error('config file is missing EXTERNAL_WEB_IP, probably corrupt')
    raise Exception, 'config file is missing EXTERNAL_WEB_IP, probably corrupt'

  if config.var('USE_DHCP') is None:
    logging.error('config file is missing USE_DHCP, probably corrupt')
    raise Exception, 'config file is missing USE_DHCP, probably corrupt'

  if config.var('DNS_DHCP') is None:
    logging.error('config file is missing DNS_DHCP, probably corrupt')
    raise Exception, 'config file is missing DNS_DHCP, probably corrupt'

  tries = 3 # default for CLUSTER and PILE configuration
  if configType in ["SUPER", "ONEBOX", "MINI", "LITE", "FULL"]:
    tries = 1
    # reconfigure eth0 to dhcp mode
    if config.var('USE_DHCP') == 1:
      ret1 = ExecuteWrapper(
        machines,
        "%s LOCALNET DHCP %s" % (
        C.RECONFIGURE_COMMAND % ent_home,
        config.var('ONEBOX_NETCARD_SETTINGS'),
        ), None, 600, num_tries = tries)
      logging.info("reconfigure eth0 IP to dhcp mode: %s" % ret1)
    else:
      ret1 = ExecuteWrapper(
        machines,
        "%s LOCALNET %s %s %s %s" % (
        C.RECONFIGURE_COMMAND % ent_home,
        config.var('EXTERNAL_WEB_IP'),
        config.var('EXTERNAL_NETMASK'),
        config.var('EXTERNAL_DEFAULT_ROUTE'),
        config.var('ONEBOX_NETCARD_SETTINGS'),
        ), None, 600, num_tries = tries)
      logging.info("reconfigure eth0 IP address: %s" % ret1)
  elif "CLUSTER" == configType:
    # reconfigure serveriron IP address
    cmd = "(sleep 5; %s SERVERIRON %s %s %s %s %s %s %s %s %s >&/dev/null " \
          "</dev/null)" % (
      C.RECONFIGURE_COMMAND % ent_home,
      "%s/local/conf/defaults/" % ent_home,
      commands.mkarg(config.var("EXTERNAL_SWITCH_IP")),
      commands.mkarg(config.var("EXTERNAL_WEB_IP")),
      commands.mkarg(config.var("EXTERNAL_CRAWL_IP")),
      commands.mkarg(config.var("EXTERNAL_NETMASK")),
      commands.mkarg(config.var("EXTERNAL_DEFAULT_ROUTE")),
      commands.mkarg(str(config.var("EXTERNAL_WEB_PORT"))),
      commands.mkarg(str(config.var("SERVERIRON_AUTONEGOTIATION"))),
      commands.mkarg(str(config.var("ENT_ENABLE_EXTERNAL_SSH"))),
      )
    t = threading.Thread(target=E.execute,
                         args=([E.getCrtHostName()], cmd, None, 60))
    t.start()
    logging.info("serveriron IP address reconfigured")


  # we don't want to change ANYTHING on PILE clusters
  if "PILE" != configType:
    # DNS needs to be set everywhere
    dns_server = "\"\""
    dns_searchpath = "\"\""
    if config.var('BOT_DNS_SERVERS') != None:
      dns_server = config.var('BOT_DNS_SERVERS')
    if config.var('DNS_SEARCH_PATH') != None:
      dns_searchpath = config.var('DNS_SEARCH_PATH')
    if config.var('DNS_DHCP') != 1:
      ret8 = ExecuteWrapper(
          machines,
          "%s DNSMODE STATIC" % (
          C.RECONFIGURE_COMMAND % ent_home
          ),  None, 600, num_tries = tries)
      logging.info("setting DNS mode to STATIC: %s" % ret8)
      ret2 = ExecuteWrapper(
        machines,
        "%s DNS %s %s" % (
        C.RECONFIGURE_COMMAND % ent_home,
        commands.mkarg(dns_server),
        commands.mkarg(dns_searchpath),
        ), None, 600, num_tries = tries)
      logging.info("reconfigure DNS: %s" % ret2)
    else:
      ret8 = ExecuteWrapper(
          machines,
          "%s DNSMODE DHCP" % (
          C.RECONFIGURE_COMMAND % ent_home
          ),  None, 600, num_tries = tries)
      logging.info("setting DNS mode to DHCP: %s" % ret8)

    # NTP is special: all machines but the master must set their
    # NTP server to the master, the master to the external one.
    # However, It can take 3 minutes for the stratum level of the master
    # node to be set. Before that, non-master nodes using the master
    # node to do "ntpdate" may return "no server suitable for synchronization
    # found" error.
    # To fix the problem, we just use the external ntp servers for all nodes.
    # Later, periodic script will set the non-master nodes to use the master
    # node. (periodic script will only set it once as long as master does
    # not switch)
    ntpServers = "\"\""
    if config.var('NTP_SERVERS') != None:
      ntp_server_list = copy.copy(config.var('NTP_SERVERS'))
      AddDefaultNTPServer(ntp_server_list)
      ntpServers = string.join(ntp_server_list, ",")

    if i_am_master:
      ret4 = ExecuteWrapper(
        machines,
        "%s NTP %d %s" % (
        C.RECONFIGURE_COMMAND % ent_home,
        force_ntp_reconfig,
        commands.mkarg(ntpServers)),
        None, 600, num_tries = tries)
      logging.info("reconfigure NTP: %s" % ret4)

  # whenever we print dates, we want to include the timezone. Since
  # the timezone may change on the machine (through config interface),
  # and java VM does not pick this change up automatically,
  # we keep track of the timezone here
  ret5 = ExecuteWrapper(
    machines,
    "%s TIMEZONE %s" % (
    C.RECONFIGURE_COMMAND % ent_home,
    commands.mkarg(config.var('TIMEZONE'))),
    None, 600, num_tries = tries)
  logging.info("reconfigure TIMEZONE: %s" % ret5)

  # iptables
  if configType in ["SUPER", "ONEBOX", "LITE", "FULL"]:
    iptables_file = "%s/local/conf/defaults/iptables.onebox" % ent_home
  elif "MINI" == configType:
    iptables_file = "%s/local/conf/defaults/iptables.mini" % ent_home
  elif "CLUSTER" == configType:
    iptables_file = "%s/local/conf/defaults/iptables.cluster" % ent_home
  else:
    iptables_file = "%s/local/conf/defaults/iptables.open" % ent_home
  ret6 = ExecuteWrapper(
    machines,
    "%s IPTABLES %s %s %s" % (
    C.RECONFIGURE_COMMAND % ent_home,
    iptables_file,
    commands.mkarg(str(config.var("ENT_ENABLE_EXTERNAL_SSH"))),
    commands.mkarg(str(config.var("ENT_ENABLE_EXTERNAL_BORGMON"))),
    ), None, 600, num_tries = tries)
  logging.info("reconfigure IPTABLES: %s" % ret6)

  # return value from each reconfigurenet call is:
  # 0 : no change
  # 1 : changed
  # 2+: error (but we report no errors)

  # _our_ return value is if any of them failed
  ret = (ret1 <= 1 and ret2 <= 1 and ret4 <= 1 and
         ret5 <= 1 and ret6 <= 1 and ret8 <= 1)
  return ret
Exemplo n.º 45
0
    def add(self, machine, apc_outlet):
        """
    This adds a machine to the configuration
    """
        # We can add a machine only when we are in active state
        if install_utilities.install_state(
                self.cfg.getGlobalParam('VERSION')) != "ACTIVE":
            logging.error("Can add a machine only when we are in active state")
            return 1

        # First test for accessibility of the machine.
        if E.execute([machine], 'echo 1', None, 1) != E.ERR_OK:
            logging.error("Could not ssh into the machine %s" % machine)
            return 1

        # start the svs on the remote machine
        restart_svs_cmd = "%s/local/google3/enterprise/legacy/util/svs_utilities.py %s %s" % (
            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
            self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine)
        if E.execute([E.getCrtHostName()],
                         SECURE_WRAPPER_COMMAND % ( \
                              self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                              "-p2",
                              restart_svs_cmd),
                         None, 0) != E.ERR_OK:
            logging.error("Could not start svs on machine %s" % machine)
            return 1

        # wait for some time for svs to come up
        time.sleep(5)
        # check to see if the svs is up and is the right version
        if not svs_utilities.PingAndCheckSvsVersion(
                self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine):
            logging.error("Svs not running correctly on machine %s" % machine)
            return 1
        ver = self.cfg.getGlobalParam('VERSION')
        home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
        testver = install_utilities.is_test(ver)

        # update MACHINES
        machines = self.cfg.getGlobalParam('MACHINES')
        if machine not in machines:
            machines.append(machine)
        self.cfg.setGlobalParam('MACHINES', machines)

        ret = core_utils.RemDeadNode(ver, testver, machine)
        if ret:
            logging.error('Cannot remove dead node from lockserver.')
            # we ignore this error for now

        # We just added a new machine into the config
        # this will lead to a change in concentrator config
        # so we need to re-run serve service which will
        # write the new config and restart the concentrator
        serve_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                          "./serve_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'))
        E.exe("%s %s" % (serve_cmd, "babysit"))

        num_tries = 5
        cur_try = 0
        while cur_try < num_tries:
            cur_try = cur_try + 1
            all_disks = self.cfg.mach_param_cache.GetFact(
                "mounted-drives", machine)
            bad_disks = self.cfg.mach_param_cache.GetFact(
                "var_log_badhds", machine)
            if bad_disks and all_disks:
                break
            time.sleep(60)
        if all_disks == None or bad_disks == None:
            logging.error("Could not get machine information about %s" %
                          machine)
            return 1

        bad_disks = string.split(bad_disks, ' ')
        all_disks = string.split(all_disks, ' ')
        good_disks = filter(lambda x, y=bad_disks: x not in y, all_disks)
        good_disks = map(lambda x: "%s3" % x, good_disks)
        # change sda3 to hda3 etc.
        good_disks = map(lambda x: re.sub(r'^s', 'h', x), good_disks)

        # Preprocess disks before adding to remove duplicates.
        unique_good_disks = []
        [
            unique_good_disks.append(disk) for disk in good_disks
            if disk not in unique_good_disks
        ]

        # Add disks
        self.updatedisk(machine, unique_good_disks, true)

        # apc map update
        apc_map = self.cfg.globalParams.var_copy('APC_MAP')
        apc_map[machine] = apc_util.PortMap(apc_outlet)
        if not self.cfg.setGlobalParam('APC_MAP', apc_map):
            logging.error("ERROR setting apc map to %s" % repr(apc_map))
            return 1

        # create appropriate datadirs on that machine
        if not self.cfg.createDataDirs([machine], node_replacement=1):
            logging.error("ERROR could not create datadirs on machine %s" %
                          machine)
            return 1

        # Replicate the config
        self.cfg.replicateConfigOnMachine(machine)

        # Reconfigure net on the target machine
        if not reconfigurenet_util.doReconfigureNet(
                self.cfg.globalParams, [machine], i_am_master=0):
            logging.error('reconfigurenet failed for %s' % machine)
            return 1

        # Start core services on the new node
        if not install_utilities.start_core(ver, home, [machine], ignore=0):
            logging.error("ERROR could not start core services on %s" %
                          machine)
            return 1
        # Add the chunkserver back
        gfs_utils.AddGFSChunkservers(ver, testver, [machine])

        # first we need to do Machine allocation.
        # this will assign things that will satisfy the constraints
        if not self.cfg.DoMachineAllocation(serversets=['workqueue-slave']):
            logging.error("ERROR doing machine allocation")
            return 1

        # now try to relllocate some servers from existing machines to the new machine
        replaced = self.cfg.AllocateServersToNewMachine(machine)
        if not replaced:
            logging.error("ERROR allocating services to the new machine")
            return 1

        # first we need to restart the babysitter
        E.exe("%s %s" % (serve_cmd, "babysit"))
        time.sleep(60)

        # Now we need to stop all the replaced services
        for server_string in replaced:
            server = serverlib.Server()
            server.InitFromName(server_string)
            replaced_type = server.servertype()
            kill_cmd = servertype.GetKillCmd(replaced_type, server.port())
            if E.execute([server.host()], kill_cmd, None, 1) != E.ERR_OK:
                logging.error("ERROR killing %s running on port %d on %s" % \
                                     (replaced_type, server.port(), server.host()))

        # we should make it active
        if not install_utilities.set_install_state(
                machine, self.cfg.getGlobalParam('ENTERPRISE_HOME'), "ACTIVE"):
            logging.error("ERROR changing state on machine %s. "
                          "Please make it active and activate and "
                          "start crawl service on it" % machine)
            return 1

        crawl_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                          "./crawl_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'))
        if E.execute([machine], "%s %s" %
                     (crawl_cmd, "start"), None, 1) != E.ERR_OK:
            logging.error("Could not start crawl service on %s" % machine)
            return 1

        # save all the params
        self.cfg.saveParams()

        # for faster crawl recovery, lets restart all crawl processes
        self.restart_crawl_processes(serve_cmd)

        # activate the crawl and logcontrol service on the remote machine
        crawl_activate_cmd = "/etc/rc.d/init.d/crawl_%s activate >&/dev/null" \
                               "</dev/null" % self.cfg.getGlobalParam('VERSION')
        if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                                self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                                "-e",
                                crawl_activate_cmd),
                         None, 0) != E.ERR_OK:
            logging.error("Could not activate crawl service on machine %s" %
                          machine)
            logging.error("Please activate by hand")
            return 1
        log_activate_cmd = "/etc/rc.d/init.d/logcontrol_%s activate >&/dev/null" \
                               "</dev/null" % self.cfg.getGlobalParam('VERSION')
        if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                                self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                                "-e",
                               log_activate_cmd),
                         None, 0) != E.ERR_OK:
            logging.error(
                "Could not activate logcontrol service on machine %s" %
                machine)
            logging.error("Please activate by hand")
            return 1

        serve_activate_cmd = "/etc/rc.d/init.d/serve_%s activate >&/dev/null" \
                               "</dev/null" % self.cfg.getGlobalParam('VERSION')
        if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                                self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                                "-e",
                               serve_activate_cmd),
                         None, 0) != E.ERR_OK:
            logging.error("Could not activate serve service on machine %s" %
                          machine)
            logging.error("Please activate by hand")
            return 1

        logging.info("Machine %s successfully added into the system" % machine)

        if not mail_already_sent(M.MSG_MACHINEADDED % machine):
            SendMail.send(self.cfg, None, false, M.MSG_MACHINEADDED % machine,
                          "", true)
        return 0
Exemplo n.º 46
0
  def add(self, machine, apc_outlet):
    """
    This adds a machine to the configuration
    """
    # We can add a machine only when we are in active state
    if install_utilities.install_state(self.cfg.getGlobalParam('VERSION')) != "ACTIVE":
      logging.error("Can add a machine only when we are in active state")
      return 1

    # First test for accessibility of the machine.
    if E.execute([machine], 'echo 1', None, 1) != E.ERR_OK:
      logging.error("Could not ssh into the machine %s" % machine)
      return 1

    # start the svs on the remote machine
    restart_svs_cmd = "%s/local/google3/enterprise/legacy/util/svs_utilities.py %s %s" % (
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          machine)
    if E.execute([E.getCrtHostName()],
                     SECURE_WRAPPER_COMMAND % ( \
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          "-p2",
                          restart_svs_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not start svs on machine %s" % machine)
      return 1

    # wait for some time for svs to come up
    time.sleep(5)
    # check to see if the svs is up and is the right version
    if not svs_utilities.PingAndCheckSvsVersion(
                          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          machine):
      logging.error("Svs not running correctly on machine %s" % machine)
      return 1
    ver = self.cfg.getGlobalParam('VERSION')
    home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
    testver = install_utilities.is_test(ver)

    # update MACHINES
    machines = self.cfg.getGlobalParam('MACHINES')
    if machine not in machines:
      machines.append(machine)
    self.cfg.setGlobalParam('MACHINES', machines)

    ret = core_utils.RemDeadNode(ver, testver, machine)
    if ret:
      logging.error('Cannot remove dead node from lockserver.')
      # we ignore this error for now

    # We just added a new machine into the config
    # this will lead to a change in concentrator config
    # so we need to re-run serve service which will
    # write the new config and restart the concentrator
    serve_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                      "./serve_service.py %s" % (
      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'))
    E.exe("%s %s" % (serve_cmd, "babysit"))

    num_tries = 5
    cur_try = 0
    while cur_try < num_tries:
      cur_try = cur_try + 1
      all_disks = self.cfg.mach_param_cache.GetFact("mounted-drives", machine) 
      bad_disks = self.cfg.mach_param_cache.GetFact("var_log_badhds", machine) 
      if bad_disks and all_disks:
        break
      time.sleep(60)
    if all_disks == None or bad_disks == None:
      logging.error("Could not get machine information about %s" % machine)
      return 1

    bad_disks = string.split(bad_disks, ' ')
    all_disks = string.split(all_disks, ' ')
    good_disks = filter(lambda x, y=bad_disks: x not in y, all_disks)
    good_disks = map(lambda x: "%s3" % x, good_disks)
    # change sda3 to hda3 etc.
    good_disks = map(lambda x: re.sub(r'^s', 'h', x), good_disks)

    # Preprocess disks before adding to remove duplicates.
    unique_good_disks = []
    [unique_good_disks.append(disk) for disk in good_disks if disk not in unique_good_disks]

    # Add disks
    self.updatedisk(machine, unique_good_disks, true)

    # apc map update
    apc_map = self.cfg.globalParams.var_copy('APC_MAP')
    apc_map[machine] = apc_util.PortMap(apc_outlet)
    if not self.cfg.setGlobalParam('APC_MAP', apc_map):
      logging.error("ERROR setting apc map to %s" % repr(apc_map))
      return 1

    # create appropriate datadirs on that machine
    if not self.cfg.createDataDirs([machine], node_replacement = 1):
      logging.error("ERROR could not create datadirs on machine %s" % machine)
      return 1

    # Replicate the config
    self.cfg.replicateConfigOnMachine(machine)

    # Reconfigure net on the target machine
    if not reconfigurenet_util.doReconfigureNet(self.cfg.globalParams,
                                                [machine], i_am_master=0):
      logging.error('reconfigurenet failed for %s' % machine)
      return 1

    # Start core services on the new node
    if not install_utilities.start_core(ver, home, [machine], ignore=0):
      logging.error("ERROR could not start core services on %s" % machine)
      return 1
    # Add the chunkserver back
    gfs_utils.AddGFSChunkservers(ver, testver, [machine])

    # first we need to do Machine allocation.
    # this will assign things that will satisfy the constraints
    if not self.cfg.DoMachineAllocation(serversets=['workqueue-slave']):
      logging.error("ERROR doing machine allocation")
      return 1

    # now try to relllocate some servers from existing machines to the new machine
    replaced = self.cfg.AllocateServersToNewMachine(machine)
    if not replaced:
      logging.error("ERROR allocating services to the new machine")
      return 1

    # first we need to restart the babysitter
    E.exe("%s %s" % (serve_cmd, "babysit"))
    time.sleep(60)

    # Now we need to stop all the replaced services
    for server_string in replaced:
      server = serverlib.Server()
      server.InitFromName(server_string)
      replaced_type = server.servertype()
      kill_cmd = servertype.GetKillCmd(replaced_type, server.port())
      if E.execute([server.host()], kill_cmd, None, 1) != E.ERR_OK:
        logging.error("ERROR killing %s running on port %d on %s" % \
                             (replaced_type, server.port(), server.host()))


    # we should make it active
    if not install_utilities.set_install_state(machine,
                             self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                             "ACTIVE"):
      logging.error("ERROR changing state on machine %s. "
                    "Please make it active and activate and "
                    "start crawl service on it" % machine)
      return 1

    crawl_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                      "./crawl_service.py %s" % (
      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'))
    if E.execute([machine], "%s %s" % (crawl_cmd, "start"), None, 1) != E.ERR_OK:
      logging.error("Could not start crawl service on %s" % machine)
      return 1

    # save all the params
    self.cfg.saveParams()

    # for faster crawl recovery, lets restart all crawl processes
    self.restart_crawl_processes(serve_cmd)

    # activate the crawl and logcontrol service on the remote machine
    crawl_activate_cmd = "/etc/rc.d/init.d/crawl_%s activate >&/dev/null" \
                           "</dev/null" % self.cfg.getGlobalParam('VERSION')
    if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                            "-e",
                            crawl_activate_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not activate crawl service on machine %s" % machine)
      logging.error("Please activate by hand")
      return 1
    log_activate_cmd = "/etc/rc.d/init.d/logcontrol_%s activate >&/dev/null" \
                           "</dev/null" % self.cfg.getGlobalParam('VERSION')
    if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                            "-e",
                           log_activate_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not activate logcontrol service on machine %s" % machine)
      logging.error("Please activate by hand")
      return 1

    serve_activate_cmd = "/etc/rc.d/init.d/serve_%s activate >&/dev/null" \
                           "</dev/null" % self.cfg.getGlobalParam('VERSION')
    if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                            "-e",
                           serve_activate_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not activate serve service on machine %s" % machine)
      logging.error("Please activate by hand")
      return 1

    logging.info("Machine %s successfully added into the system" % machine)

    if not mail_already_sent(M.MSG_MACHINEADDED % machine):
      SendMail.send(self.cfg, None, false,
                  M.MSG_MACHINEADDED % machine, "", true)
    return 0
Exemplo n.º 47
0
  python2_invoke_string = ""
  if FLAGS.use_python2:
    # we want to invoke using python2
    python2_invoke_string = "python2 "

  action = "--start=all"
  if FLAGS.kill_only:
    # we only want to kill and not restart
    action = "--kill=all"

  cmd = ('. %s; cd %s/enterprise/legacy/production/babysitter; '
         ' %s./babysitter.py --batch %s %s --ports=%s --babyalias=localhost '
         '--lockdir=%s/tmp --mailto= --mach=%s %s' % (
            cp.var("BASHRC_FILE"),
            cp.var("MAIN_GOOGLE3_DIR"),
            python2_invoke_string,
            action,
            use_invalid_config,
            FLAGS.port,
            cp.var("ENTERPRISE_HOME"),
            FLAGS.machine,
            cp.GetConfigFileName()
        ))
  logging.info("Executing [%s]" % cmd)
  if E.ERR_OK != E.execute([E.getCrtHostName()], cmd, None, true):
    sys.exit("Error restarting server at %s:%s" % (FLAGS.machine, FLAGS.port))

if __name__ == '__main__':
  main(sys.argv)
Exemplo n.º 48
0
#!/usr/bin/python2.4
#
# This handy command reads global config file on the current machine and
# writes the value of CONFIGVERSION parameter together with the machine name.
# (Used by periodic_script.py)
#
###############################################################################
"""
Usage :
  get_config_version.py <enthome>
"""

import sys
from google3.enterprise.legacy.util import E
from google3.enterprise.legacy.adminrunner import entconfig


if __name__ == '__main__':
  config = entconfig.EntConfig(sys.argv[1])
  if not config.Load():
    version = ""
  else:
    version = config.var("CONFIGVERSION")

  print "%s-%s" % (version, E.getCrtHostName())

###############################################################################
Exemplo n.º 49
0
        sys.exit("Unable to load config file  %s" % (cp.GetConfigFileName()))

    use_invalid_config = ""
    if FLAGS.useinvalidconfig:
        use_invalid_config = "--useinvalidconfig"

    python2_invoke_string = ""
    if FLAGS.use_python2:
        # we want to invoke using python2
        python2_invoke_string = "python2 "

    action = "--start=all"
    if FLAGS.kill_only:
        # we only want to kill and not restart
        action = "--kill=all"

    cmd = ('. %s; cd %s/enterprise/legacy/production/babysitter; '
           ' %s./babysitter.py --batch %s %s --ports=%s --babyalias=localhost '
           '--lockdir=%s/tmp --mailto= --mach=%s %s' %
           (cp.var("BASHRC_FILE"), cp.var("MAIN_GOOGLE3_DIR"),
            python2_invoke_string, action, use_invalid_config, FLAGS.port,
            cp.var("ENTERPRISE_HOME"), FLAGS.machine, cp.GetConfigFileName()))
    logging.info("Executing [%s]" % cmd)
    if E.ERR_OK != E.execute([E.getCrtHostName()], cmd, None, true):
        sys.exit("Error restarting server at %s:%s" %
                 (FLAGS.machine, FLAGS.port))


if __name__ == '__main__':
    main(sys.argv)
Exemplo n.º 50
0
def main(argv):
    """ expects: argv[0] to be a machine name, argv[1] be a google_config file
  - copys google_config file from the machine specified into the local machine
    (by calling CopyFile() function)
  - calls ReplicateConfig() - which """
    if len(argv) < 2:
        sys.exit(__doc__)

    machine = argv[0]
    global_file = argv[1]
    global_dir = os.path.dirname(global_file)

    if E.getCrtHostName() == machine:
        logging.info("I do not try to replicate to myself")
        sys.exit(0)

    config = config_factory.ConfigFactory().CreateConfig(global_file)
    if not config.Load():
        sys.exit("ERROR: Cannot read file %s " % global_file)

    file_to_copy = [
        C.ETC_SYSCONFIG, global_file,
        "%s/lic_counter" % global_dir,
        "%s/lic_counter.bck" % global_dir,
        "%s/passwd" % global_dir,
        "/export/hda3/versionmanager/vmanager_passwd",
        "%s/server.p12" % global_dir
    ]
    # Sync all the files
    for f in file_to_copy:
        CopyFile(machine, f, production_tmp)

    # sync apache certificate
    CopyFileAsRoot(machine,
                   (ssl_cert.CERT_FILENAME % config.var('ENTERPRISE_HOME')),
                   production_tmp, config)

    # sync private key
    CopyFileAsRoot(machine,
                   (ssl_cert.KEY_FILENAME % config.var('ENTERPRISE_HOME')),
                   production_tmp, config)

    # sync the support request repository directory
    # GRR: ReplicateDirectory(machine, SUPPORTREQUEST_RECORDS_DIR)

    # replicate config including per collection/frontend stuff
    # some dirs in conf don't need to be rsync'ed
    if core_utils.CanRunGSAMaster(E.getCrtHostName()):
        exclude_patterns = [
            'cmr_working/failure/', 'cmr_working/success/',
            'cmr_working/statusz/'
        ]
    else:
        exclude_patterns = [
            'cmr_working/', 'cmr/', 'fixer/', 'fixer_cmr/', 'gems'
        ]
    ReplicateDirectory(machine,
                       "%s/local/conf/" % config.var('ENTERPRISE_HOME'),
                       exclude_patterns)

    # to replicate per frontend gws stuff (keymatches, gws bad urls)
    ReplicateDirectory(machine, "%s/gws/" % config.var('GOOGLEDATA'))

    # need to adjust NTP if master has changed
    # assumption : this machine is not the master
    if 'PILE' != config.var('ENT_CONFIG_TYPE'):
        ent_home = E.getEnterpriseHome()

        os.system("%s NTP %s" % (C.RECONFIGURE_COMMAND % ent_home, machine))

        # also make sure DNS is up to date
        os.system(
            "%s DNS %s %s" %
            (C.RECONFIGURE_COMMAND % ent_home, config.var('BOT_DNS_SERVERS'),
             config.var('DNS_SEARCH_PATH')))