Пример #1
0
 def post_upgrade_restart(self, env, upgrade_type=None):
     Logger.info("Executing DataNode Stack Upgrade post-restart")
     import params
     env.set_params(params)
     hdfs_binary = self.get_hdfs_binary()
     # ensure the DataNode has started and rejoined the cluster
     datanode_upgrade.post_upgrade_check(hdfs_binary)
Пример #2
0
    def service_check(self, env):
        import params

        env.set_params(params)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}"
            ),
                    user=params.smoke_test_user)
        atlas_host_call_count = 0

        for atlas_host in params.atlas_hosts:
            if params.security_enabled:
                smoke_cmd = format(
                    'curl -k --negotiate -u : -b ~/cookiejar.txt -c ~/cookiejar.txt -s -o /dev/null -w "%{{http_code}}" {metadata_protocol}://{atlas_host}:{metadata_port}/'
                )
            else:
                smoke_cmd = format(
                    'curl -k -s -o /dev/null -w "%{{http_code}}" {metadata_protocol}://{atlas_host}:{metadata_port}/'
                )
            try:
                Execute(smoke_cmd,
                        user=params.smoke_test_user,
                        tries=5,
                        try_sleep=10)
            except Exception, err:
                atlas_host_call_count = atlas_host_call_count + 1
                Logger.error(
                    "ATLAS service check failed for host {0} with error {1}".
                    format(atlas_host, err))
Пример #3
0
 def pre_upgrade_restart(self, env, upgrade_type=None):
     Logger.info("Executing DataNode Stack Upgrade pre-restart")
     import params
     env.set_params(params)
     if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE,
                                               params.version):
         stack_select.select_packages(params.version)
Пример #4
0
 def pre_upgrade_restart(self, env, upgrade_type=None):
     Logger.info("Executing DataNode Stack Upgrade pre-restart")
     import params
     env.set_params(params)
     if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE,
                                               params.version):
         conf_select.select(params.stack_name, "hadoop", params.version)
         stack_select.select("hadoop-hdfs-datanode", params.version)
Пример #5
0
def get_logsearch_meta_configs(configurations):
    logsearch_meta_configs = {}
    for key, value in configurations.iteritems(
    ):  # iter on both keys and values
        if str(key).endswith('logsearch-conf'):
            logsearch_meta_configs[key] = value
            Logger.info("Found logsearch config entry : " + key)
    return logsearch_meta_configs
def get_status(rpc, name):
  server = xmlrpclib.Server(rpc)
  try:
    info = server.supervisor.getProcessInfo(name)
  except xmlrpclib.Fault:
    Logger.error("supervisor: process %s not found" % name)
  except Exception as msg:
    Logger.error("supervisor: process %s: %s" % (name, str(msg)))
  return info['statename']
def get_logfeeder_metadata(logsearch_meta_configs):
    """
  get logfeeder pattern metadata list, an element: (e.g.) :
  ['service_config_name' : 'pattern json content']
  """
    logfeeder_contents = {}
    for key, value in logsearch_meta_configs.iteritems():
        if 'content' in logsearch_meta_configs[key]:
            logfeeder_contents[key] = logsearch_meta_configs[key]['content']
            Logger.info("Found logfeeder pattern content in " + key)
    return logfeeder_contents
Пример #8
0
def __parse_component_mappings(component_mappings):
    components = list()
    component_mappings_list = component_mappings.split(';')
    if component_mappings_list and len(component_mappings_list) > 0:
        metadata_list = map(lambda x: x.split(':'), component_mappings_list)
        if metadata_list and len(metadata_list) > 0:
            for metadata in metadata_list:
                if (len(metadata) == 2):
                    logids = metadata[1].split(',')
                    components.extend(logids)
                    Logger.info(
                        "Found logids for logsearch component %s - (%s) " %
                        (metadata[0], metadata[1]))
    return components
Пример #9
0
  def service_check(self, env):
    import params

    env.set_params(params)

    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}"),
              user=params.metadata_user)

    try:
      Execute(params.smoke_cmd, user=params.metadata_user, tries = 5,
              try_sleep = 10)
      Logger.info('Atlas server up and running')
    except:
      Logger.debug('Atlas server not running')
Пример #10
0
    def service_check(self, env):
        import params

        env.set_params(params)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}"
            ),
                    user=params.smoke_test_user)

        try:
            Execute(params.smoke_cmd,
                    user=params.smoke_test_user,
                    tries=5,
                    try_sleep=10)
            Logger.info('Atlas server up and running')
        except:
            Logger.debug('Atlas server not running')
Пример #11
0
def producer_service(action='start'):
  import params
  process = params.program_name
  server = xmlrpclib.Server(params.producer_rpc)
  status = get_status(params.producer_rpc, process)
  res = False

  try:
    if action == 'start':
      if status != 'RUNNING':
        res = server.supervisor.startProcess(process)
    elif action == 'stop':
      if status == 'RUNNING':
        res = server.supervisor.stopProcess(process)
    elif action == 'restart':
      res = server.supervisor.restart()
  except xmlrpclib.Fault:
    Logger.error("supervisor: process %s not found" % process)
  except Exception as msg:
    Logger.error("supervisor: process %s: %s" % (process, str(msg)))

  if res:
    res = 'success'
  else:
    res = 'failed'

  Logger.info("Action '%s' completed: %s" % (action, res))
Пример #12
0
  def service_check(self, env):
    import params

    env.set_params(params)

    for i in xrange(0, self.ATLAS_CONNECT_TRIES):
      try:
        conn = httplib.HTTPConnection(params.metadata_host,
                                      int(params.metadata_port))
        conn.request("GET", format("http://{params.metadata_host}:{params.metadata_port}/"))
      except (httplib.HTTPException, socket.error) as ex:
        if i < self.ATLAS_CONNECT_TRIES - 1:
          time.sleep(self.ATLAS_CONNECT_TIMEOUT)
          Logger.info("Connection failed. Next retry in %s seconds."
                      % (self.ATLAS_CONNECT_TIMEOUT))
          continue
        else:
          raise Fail("Service check has failed.")

    resp = conn.getresponse()
    if resp.status == 200 :
      Logger.info('Atlas server up and running')
    else:
      Logger.debug('Atlas server not running')
      raise ComponentIsNotRunning()
Пример #13
0
    def check_datanode_shutdown(self, hdfs_binary):
        """
    Checks that a DataNode is down by running "hdfs dfsamin getDatanodeInfo"
    several times, pausing in between runs. Once the DataNode stops responding
    this method will return, otherwise it will raise a Fail(...) and retry
    automatically.
    The stack defaults for retrying for HDFS are also way too slow for this
    command; they are set to wait about 45 seconds between client retries. As
    a result, a single execution of dfsadmin will take 45 seconds to retry and
    the DataNode may be marked as dead, causing problems with HBase.
    https://issues.apache.org/jira/browse/HDFS-8510 tracks reducing the
    times for ipc.client.connect.retry.interval. In the meantime, override them
    here, but only for RU.
    :param hdfs_binary: name/path of the HDFS binary to use
    :return:
    """
        import params

        # override stock retry timeouts since after 30 seconds, the datanode is
        # marked as dead and can affect HBase during RU
        dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary)
        command = format(
            '{dfsadmin_base_command} -D ipc.client.connect.max.retries=5 -D ipc.client.connect.retry.interval=1000 -getDatanodeInfo {dfs_dn_ipc_address}'
        )

        is_datanode_deregistered = False
        try:
            shell.checked_call(command, user=params.hdfs_user, tries=1)
        except:
            is_datanode_deregistered = True

        if not is_datanode_deregistered:
            Logger.info(
                "DataNode has not yet deregistered from the NameNode...")
            raise Fail(
                'DataNode has not yet deregistered from the NameNode...')

        Logger.info("DataNode has successfully shutdown.")
        return True
Пример #14
0
  def service_check(self, env):
    import params
    env.set_params(params)
    server = xmlrpclib.Server(params.producer_rpc)
    info = dict()
    try:
      info = server.supervisor.getProcessInfo(params.program_name)
    except xmlrpclib.Fault:
      Logger.error("supervisor: process %s not found" % params.program_name)
    except Exception as msg:
      Logger.error("supervisor: process %s: %s" % (params.program_name, str(msg)))

    status = info.get('statename', None)

#    if status != 'RUNNING':
#        raise Fail("supervisor: process %s in %s state" % (params.program_name, status))

    Logger.info("Status of process %s is %s" % (params.program_name, status))
Пример #15
0
 def reload_configs(self, env):
     import params
     env.set_params(params)
     Logger.info("RELOAD CONFIGS")
     reconfig("datanode", params.dfs_dn_ipc_address)
Пример #16
0
def metadata():
    import params

    Directory([params.pid_dir],
              mode=0755,
              cd_access='a',
              owner=params.metadata_user,
              group=params.user_group,
              recursive=True
    )

    Directory(params.conf_dir,
              mode=0755,
              cd_access='a',
              owner=params.metadata_user,
              group=params.user_group,
              recursive=True
    )

    Directory(params.log_dir,
              mode=0755,
              cd_access='a',
              owner=params.metadata_user,
              group=params.user_group,
              recursive=True
    )

    Directory(params.data_dir,
              mode=0644,
              cd_access='a',
              owner=params.metadata_user,
              group=params.user_group,
              recursive=True
    )

    Directory(params.expanded_war_dir,
              mode=0644,
              cd_access='a',
              owner=params.metadata_user,
              group=params.user_group,
              recursive=True
    )

    metadata_war_file = format('{params.metadata_home}/server/webapp/metadata.war')
    if not os.path.isfile(metadata_war_file):
        raise Fail("Unable to copy {0} because it does not exist".format(metadata_war_file))

    Logger.info("Copying {0} to {1}".format(metadata_war_file, params.expanded_war_dir))
    shutil.copy2(metadata_war_file, params.expanded_war_dir)

    File(format('{conf_dir}/application.properties'),
         content=InlineTemplate(params.application_properties_content),
         mode=0644,
         owner=params.metadata_user,
         group=params.user_group
    )

    File(format("{conf_dir}/metadata-env.sh"),
         owner=params.metadata_user,
         group=params.user_group,
         mode=0755,
         content=InlineTemplate(params.metadata_env_content)
    )

    File(format("{conf_dir}/log4j.xml"),
         mode=0644,
         owner=params.metadata_user,
         group=params.user_group,
         content=StaticFile('log4j.xml')
    )