Пример #1
0
  def status(self, env):
    import status_params
    env.set_params(status_params)
    pid_file = format("{hive_pid_dir}/{hive_pid}")

    # Recursively check all existing gmetad pid files
    check_process_status(pid_file)
Пример #2
0
    def start(self, env):
        import os, sys, time
        import jkg_toree_params as params
        env.set_params(params)
        self.configure(env)
        delay_checks = 8

        # Need HDFS started for the next step
        helpers.create_hdfs_dirs(params.user, params.group, params.dirs)

        Execute(params.start_command, user=params.user, logoutput=True)
        check_process_status(params.jkg_pid_file)

        time.sleep(delay_checks)

        with open(params.jkg_pid_file, 'r') as fp:
            try:
                os.kill(int(fp.read().strip()), 0)
            except OSError as ose:
                if ose.errno != errno.EPERM:
                    raise Fail(
                        "Error starting Jupyter Kernel Gateway. Check {0} for the possible cause."
                        .format(params.log_dir +
                                "/jupyter_kernel_gateway.log"))
                else:
                    # non-root install might have to resort to status check but
                    # with the side-effect that any error might only reflected during
                    # the status check after a minute rather than immediately
                    check_process_status(params.jkg_pid_file)
Пример #3
0
    def status(self, env):
        import status_params
        env.set_params(status_params)
        pid_file = format("{hive_pid_dir}/{hive_pid}")

        # Recursively check all existing gmetad pid files
        check_process_status(pid_file)
Пример #4
0
  def status(self, env):
    import status_params

    env.set_params(status_params)
    pid_file = format("{spark_history_server_pid_file}")
    # Recursively check all existing gmetad pid files
    check_process_status(pid_file)
    def status(self, env):
        import status_params
        env.set_params(status_params)

        # We are not doing 'llap' status check done here as part of status check for 'HSI', as 'llap' status
        # check is a heavy weight operation.

        # Recursively check all existing gmetad pid files
        check_process_status(status_params.hive_interactive_pid)
Пример #6
0
 def status(self, env):
     import params
     env.set_params(params)
     import os
     if not os.path.exists(self.pid_file):
         Execute(
             "echo `ps aux|grep '/etc/confluent-kafka-mqtt/kafka-mqtt-production.properties' | grep -v grep | awk '{print $2}'` > "
             + self.pid_file)
     check_process_status(self.pid_file)
    def status(self, env):
        import status_params
        env.set_params(status_params)

        try:
            pid_file = glob.glob(status_params.dpprofiler_pid_file)[0]
        except IndexError:
            pid_file = ''
        check_process_status(pid_file)
Пример #8
0
 def status(self, env):
     import params
     env.set_params(params)
     import os
     if not os.path.exists(params.pid_file):
         Execute(
             "echo `ps aux|grep 'edp.rider.RiderStarter' | grep -v grep | awk '{print $2}'` > "
             + self.pid_file)
     check_process_status(params.pid_file)
Пример #9
0
 def status(self, env):
     import params
     env.set_params(params)
     import os
     if not os.path.exists(self.pid_file):
         Execute(
             "echo `ps aux|grep '/etc/kafka/server.properties' | grep -v grep | awk '{print $2}'` > "
             + self.pid_file)
     check_process_status(self.pid_file)
Пример #10
0
 def status(self, env):
     import params
     env.set_params(params)
     import os
     if not os.path.exists(self.pid_file):
         Execute(
             "echo `ps aux|grep 'wherehows-backend' | grep -v grep | awk '{print $2}'` > "
             + self.pid_file)
     check_process_status(self.pid_file)
Пример #11
0
    def status(self, env):
        print "check start"
        os.system(
            r"ps -ef | grep org.logstash.Logstash | grep -v grep | awk '{print $2}' > /var/run/logstash.pid"
        )

        import status_params
        env.set_params(status_params)
        check_process_status(status_params.logstash_pid_file)
Пример #12
0
def execute(configurations={}, parameters={}, host_name=None):
    try:
        pid_file = glob.glob(zeppelin_pid_dir + '/zeppelin-*.pid')[0]
        check_process_status(pid_file)
    except ComponentIsNotRunning as ex:
        return (RESULT_CODE_CRITICAL, [str(ex)])
    except:
        return (RESULT_CODE_CRITICAL, ["Zeppelin is not running"])

    return (RESULT_CODE_OK, ["Successful connection to Zeppelin"])
Пример #13
0
  def status(self, env):
    import status_params
    env.set_params(status_params)

    try:
        pid_file = glob.glob(status_params.zeppelin_pid_dir + '/zeppelin-' +
                             status_params.zeppelin_user + '*.pid')[0]
    except IndexError:
        pid_file = ''
    check_process_status(pid_file)
Пример #14
0
    def status(self, env):
        import status_params

        env.set_params(status_params)

        try:
            check_process_status(status_params.nfsgateway_pid_file)
        except ComponentIsNotRunning:
            check_process_status(
                status_params.unprivileged_nfsgateway_pid_file)
Пример #15
0
def execute(configurations={}, parameters={}, host_name=None):
    try:
        pid_file = nifi_pid_dir + '/nifi.pid'
        check_process_status(pid_file)
    except ComponentIsNotRunning as ex:
        return (RESULT_CODE_CRITICAL, [str(ex)])
    except:
        return (RESULT_CODE_CRITICAL, ["Nifi is not running"])

    return (RESULT_CODE_OK, ["Successful connection to Nifi"])
Пример #16
0
def wait_for_znode():
  import params
  import status_params
  
  try:
    check_process_status(status_params.hive_pid)
  except ComponentIsNotRunning:
    raise Exception(format("HiveServer2 is no longer running, check the logs at {hive_log_dir}"))
  
  cmd = format("{zk_bin}/zkCli.sh -server {zk_quorum} ls /{hive_server2_zookeeper_namespace} | grep 'serverUri='")
  code, out = shell.call(cmd)
  if code == 1:
    raise Fail(format("ZooKeeper node /{hive_server2_zookeeper_namespace} is not ready yet"))
Пример #17
0
    def status(self, env):
        import status_params
        env.set_params(status_params)

        if status_params.stack_supports_pid:
            check_process_status(status_params.ranger_kms_pid_file)
            return

        cmd = 'ps -ef | grep proc_rangerkms | grep -v grep'
        code, output = shell.call(cmd, timeout=20)
        if code != 0:
            Logger.debug('KMS process not running')
            raise ComponentIsNotRunning()
        pass
Пример #18
0
def is_monitor_process_live(pid_file):
    """
  Gets whether the Metrics Monitor represented by the specified file is running.
  :param pid_file: the PID file of the monitor to check
  :return: True if the monitor is running, False otherwise
  """
    live = False

    try:
        check_process_status(pid_file)
        live = True
    except ComponentIsNotRunning:
        pass

    return live
def is_monitor_process_live(pid_file):
  """
  Gets whether the Metrics Monitor represented by the specified file is running.
  :param pid_file: the PID file of the monitor to check
  :return: True if the monitor is running, False otherwise
  """
  live = False

  try:
    check_process_status(pid_file)
    live = True
  except ComponentIsNotRunning:
    pass

  return live
Пример #20
0
def execute(configurations={}, parameters={}, host_name=None):
    """
    Returns a tuple containing the result code and a pre-formatted result label

    Keyword arguments:
    configurations (dictionary): a mapping of configuration key to value
    parameters (dictionary): a mapping of script parameter key to value
    host_name (string): the name of this host where the alert is running
    """

    try:
        beacon_pid_dir = configurations[BEACON_PID_DIR_KEY]
        beacon_pid_file = os.path.join(beacon_pid_dir, "beacon.pid")
        check_process_status(beacon_pid_file)
        return (RESULT_CODE_OK, ["Beacon Server process is running"])
    except:
        return (RESULT_CODE_CRITICAL, ['Beacon Server process is not running'])
Пример #21
0
def check_service_status(name):
  from resource_management.libraries.functions.check_process_status import check_process_status
  if name=='collector':
    pid_file = format("{ams_collector_pid_dir}/ambari-metrics-collector.pid")
    check_process_status(pid_file)
    pid_file = format("{hbase_pid_dir}/hbase-{hbase_user}-master.pid")
    check_process_status(pid_file)
    if os.path.exists(format("{hbase_pid_dir}/distributed_mode")):
      pid_file = format("{hbase_pid_dir}/hbase-{hbase_user}-regionserver.pid")
      check_process_status(pid_file)

  elif name == 'monitor':
    pid_file = format("{ams_monitor_pid_dir}/ambari-metrics-monitor.pid")
    check_process_status(pid_file)

  elif name == 'grafana':
    pid_file = format("{ams_grafana_pid_dir}/grafana-server.pid")
    check_process_status(pid_file)
Пример #22
0
  def status(self, env):
    import status_params

    env.set_params(status_params)

    if status_params.stack_supports_pid:
      check_process_status(status_params.ranger_admin_pid_file)
      return

    cmd = 'ps -ef | grep proc_rangeradmin | grep -v grep'
    code, output = shell.call(cmd, timeout=20)

    if code != 0:
      if self.is_ru_rangeradmin_in_progress(status_params.upgrade_marker_file):
        Logger.info('Ranger admin process not running - skipping as stack upgrade is in progress')
      else:
        Logger.debug('Ranger admin process not running')
        raise ComponentIsNotRunning()
    pass
Пример #23
0
def snamenode(action=None, format=False):
    if action == "configure":
        import params
        for fs_checkpoint_dir in params.fs_checkpoint_dirs:
            Directory(fs_checkpoint_dir,
                      create_parents=True,
                      cd_access="a",
                      mode=0755,
                      owner=params.hdfs_user,
                      group=params.user_group)
        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)
    elif action == "start" or action == "stop":
        import params
        service(action=action,
                name="secondarynamenode",
                user=params.hdfs_user,
                create_pid_dir=True,
                create_log_dir=True)
    elif action == "status":
        import status_params
        check_process_status(status_params.snamenode_pid_file)
Пример #24
0
  def status(self, env):
    import params

    if OSCheck.is_suse_family():
      try:
        Execute('checkproc `which krb5kdc`')
        Execute('checkproc `which kadmind`')
      except Fail as ex:
        raise ComponentIsNotRunning()

    elif OSCheck.is_ubuntu_family():
      check_process_status(params.kdamin_pid_path)
      check_process_status(params.krb5kdc_pid_path)

    else:
      check_process_status(params.kdamin_pid_path)
      check_process_status(params.krb5kdc_pid_path)
Пример #25
0
def observer_namenode(action=None, format=False):
    if action == "configure":
        import params
        for fs_checkpoint_dir in params.fs_checkpoint_dirs:
            Directory(
                fs_checkpoint_dir,
                create_parents=True,
                cd_access="a",
                mode=0755,
                owner=params.hdfs_user,
                group=params.user_group)
        File(
            params.exclude_file_path,
            content=Template("exclude_hosts_list.j2"),
            owner=params.hdfs_user,
            group=params.user_group)
        if params.hdfs_include_file:
            File(
                params.include_file_path,
                content=Template("include_hosts_list.j2"),
                owner=params.hdfs_user,
                group=params.user_group)
        generate_logfeeder_input_config(
            'hdfs',
            Template("input.config-hdfs.json.j2", extra_imports=[default]))
    elif action == "start" or action == "stop":
        import params
        service(
            action=action,
            name="observernamenode",
            user=params.hdfs_user,
            create_pid_dir=True,
            create_log_dir=True)
    elif action == "status":
        import status_params
        check_process_status(status_params.snamenode_pid_file)
Пример #26
0
def check_service_status(env, name):
    import status_params
    env.set_params(status_params)

    if name == 'collector':
        for pid_file in get_collector_pid_files():
            check_process_status(pid_file)
    elif name == 'monitor':
        check_process_status(status_params.monitor_pid_file)
    elif name == 'grafana':
        check_process_status(status_params.grafana_pid_file)
Пример #27
0
def check_service_status(env, name):
    import status_params
    env.set_params(status_params)

    from resource_management.libraries.functions.check_process_status import check_process_status
    if name == 'collector':
        for pid_file in get_collector_pid_files():
            check_process_status(pid_file)
    elif name == 'monitor':
        check_process_status(status_params.monitor_pid_file)
    elif name == 'grafana':
        check_process_status(status_params.grafana_pid_file)
Пример #28
0
  def status(self, env):
    import status_params

    env.set_params(status_params)

    check_process_status(status_params.zkfc_pid_file)
Пример #29
0
 def status(self, env):
     import status_params
     check_process_status(status_params.nifi_ca_pid_file)
Пример #30
0
 def status(self, env):
     import status_params
     env.set_params(status_params)
     check_process_status(status_params.spark_thrift_server_pid_file)
Пример #31
0
 def status(self, env):
     print "checking status..."
     check_process_status(self.PID_CONFIG_FILE)
Пример #32
0
 def status(self, env):
     import status_params
     check_process_status(status_params.registry_pid_file)
def check_oozie_server_status():
  import status_params
  from resource_management.libraries.functions.check_process_status import check_process_status

  check_process_status(status_params.pid_file)
Пример #34
0
    def status(self, env):
        import status_params

        env.set_params(status_params)

        check_process_status(status_params.spark_history_server_pid_file)
Пример #35
0
 def status(self, env):
   import status_params as params
   env.set_params(params)
   pid_file = format("{pid_dir}/TachyonMaster.pid")
   check_process_status(pid_file)
Пример #36
0
 def status(self, env):
   check_process_status('/var/run/datatorrent/dtgateway.pid')
Пример #37
0
  def status(self, env):
    import status_params

    env.set_params(status_params)
    check_process_status(status_params.resourcemanager_pid_file)
    pass
Пример #38
0
  def status(self, env):
    import status_params

    env.set_params(status_params)
    check_process_status(status_params.namenode_pid_file)
    pass
Пример #39
0
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, env=None):
  if action is None:
    raise Fail('"action" parameter is required for function namenode().')

  if action in ["start", "stop"] and hdfs_binary is None:
    raise Fail('"hdfs_binary" parameter is required for function namenode().')

  if action == "configure":
    import params
    #we need this directory to be present before any action(HA manual steps for
    #additional namenode)
    create_name_dirs(params.dfs_name_dir)
  elif action == "start":
    Logger.info("Called service {0} with upgrade_type: {1}".format(action, str(upgrade_type)))
    setup_ranger_hdfs(upgrade_type=upgrade_type)
    import params
    if do_format and not params.hdfs_namenode_format_disabled:
      format_namenode()
      pass

    File(params.exclude_file_path,
         content=Template("exclude_hosts_list.j2"),
         owner=params.hdfs_user,
         group=params.user_group
    )

    if params.dfs_ha_enabled and \
      params.dfs_ha_namenode_standby is not None and \
      params.hostname == params.dfs_ha_namenode_standby:
        # if the current host is the standby NameNode in an HA deployment
        # run the bootstrap command, to start the NameNode in standby mode
        # this requires that the active NameNode is already up and running,
        # so this execute should be re-tried upon failure, up to a timeout
        success = bootstrap_standby_namenode(params)
        if not success:
          raise Fail("Could not bootstrap standby namenode")

    if upgrade_type == "rolling" and params.dfs_ha_enabled:
      # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
      # to kill ZKFC manually, so we need to start it if not already running.
      safe_zkfc_op(action, env)

    options = ""
    if upgrade_type == "rolling":
      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"
        
    elif upgrade_type == "nonrolling":
      is_previous_image_dir = is_previous_fs_image()
      Logger.info(format("Previous file system image dir present is {is_previous_image_dir}"))

      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"

    Logger.info(format("Option for start command: {options}"))

    service(
      action="start",
      name="namenode",
      user=params.hdfs_user,
      options=options,
      create_pid_dir=True,
      create_log_dir=True
    )

    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
              user = params.hdfs_user)

    if params.dfs_ha_enabled:
      is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    else:
      is_active_namenode_cmd = True
    
    # During NonRolling Upgrade, both NameNodes are initially down,
    # so no point in checking if this is the active or standby.
    if upgrade_type == "nonrolling":
      is_active_namenode_cmd = False

    # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
    # no-HA                 | ON -> OFF                | Yes                      |
    # HA and active         | ON -> OFF                | Yes                      |
    # HA and standby        | no change                | no check                 |
    # RU with HA on active  | ON -> OFF                | Yes                      |
    # RU with HA on standby | ON -> OFF                | Yes                      |
    # EU with HA on active  | no change                | no check                 |
    # EU with HA on standby | no change                | no check                 |
    # EU non-HA             | no change                | no check                 |

    check_for_safemode_off = False
    msg = ""
    if params.dfs_ha_enabled:
      if upgrade_type is not None:
        check_for_safemode_off = True
        msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
      else:
        Logger.info("Wait for NameNode to become active.")
        if is_active_namenode(hdfs_binary): # active
          check_for_safemode_off = True
          msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
        else:
          msg = "Will remain in the current safemode state."
    else:
      msg = "Must wait to leave safemode since High Availability is not enabled."
      check_for_safemode_off = True

    Logger.info(msg)

    # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
    stay_in_safe_mode = False
    if upgrade_type == "nonrolling":
      stay_in_safe_mode = True

    if check_for_safemode_off:
      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
      if not stay_in_safe_mode:
        wait_for_safemode_off(hdfs_binary)

    # Always run this on non-HA, or active NameNode during HA.
    create_hdfs_directories(is_active_namenode_cmd)
    create_ranger_audit_hdfs_directories(is_active_namenode_cmd)

  elif action == "stop":
    import params
    service(
      action="stop", name="namenode", 
      user=params.hdfs_user
    )
  elif action == "status":
    import status_params
    check_process_status(status_params.namenode_pid_file)
  elif action == "decommission":
    decommission()
Пример #40
0
  def status(self, env):
    import status_params

    env.set_params(status_params)
    check_process_status(status_params.pid_file)
Пример #41
0
  def status(self, env):
    import status_params

    env.set_params(status_params)

    check_process_status(status_params.nfsgateway_pid_file)
Пример #42
0
 def status(self, env):
     import params
     env.set_params(params)
     pid_file = params.mysql_pid_file
     check_process_status(pid_file)
Пример #43
0
 def status(self, env):
   from hawqstatus import get_pid_file
   check_process_status(get_pid_file())