Exemplo n.º 1
0
def namenode(action=None,
             hdfs_binary=None,
             do_format=True,
             upgrade_type=None,
             env=None):
    if action is None:
        raise Fail('"action" parameter is required for function namenode().')

    if action in ["start", "stop"] and hdfs_binary is None:
        raise Fail(
            '"hdfs_binary" parameter is required for function namenode().')

    if action == "configure":
        import params
        #we need this directory to be present before any action(HA manual steps for
        #additional namenode)
        create_name_dirs(params.dfs_name_dir)
    elif action == "start":
        Logger.info("Called service {0} with upgrade_type: {1}".format(
            action, str(upgrade_type)))
        setup_ranger_hdfs(upgrade_type=upgrade_type)
        import params
        if do_format:
            format_namenode()
            pass

        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)

        if params.dfs_ha_enabled and \
          params.dfs_ha_namenode_standby is not None and \
          params.hostname == params.dfs_ha_namenode_standby:
            # if the current host is the standby NameNode in an HA deployment
            # run the bootstrap command, to start the NameNode in standby mode
            # this requires that the active NameNode is already up and running,
            # so this execute should be re-tried upon failure, up to a timeout
            success = bootstrap_standby_namenode(params)
            if not success:
                raise Fail("Could not bootstrap standby namenode")

        if upgrade_type == "rolling" and params.dfs_ha_enabled:
            # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
            # to kill ZKFC manually, so we need to start it if not already running.
            safe_zkfc_op(action, env)

        options = ""
        if upgrade_type == "rolling":
            options = "-rollingUpgrade started"
        elif upgrade_type == "nonrolling":
            is_previous_image_dir = is_previous_fs_image()
            Logger.info(
                format(
                    "Previous file system image dir present is {is_previous_image_dir}"
                ))

            if params.dfs_ha_enabled:
                if params.desired_namenode_role is None:
                    raise Fail(
                        "Did not receive parameter \"desired_namenode_role\" to indicate the role that this NameNode should have."
                    )

                if params.desired_namenode_role == "active":
                    # The "-upgrade" command can only be used exactly once. If used more than once during a retry, it will cause problems.
                    options = "" if is_previous_image_dir else "-upgrade"

                if params.desired_namenode_role == "standby":
                    options = "-bootstrapStandby -force"
            else:
                # Both Primary and Secondary NameNode can use the same command.
                options = "" if is_previous_image_dir else "-upgrade"
        Logger.info(format("Option for start command: {options}"))

        service(action="start",
                name="namenode",
                user=params.hdfs_user,
                options=options,
                create_pid_dir=True,
                create_log_dir=True)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            ),
                    user=params.hdfs_user)

        is_namenode_safe_mode_off = format(
            "{hdfs_binary} dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'"
        )
        if params.dfs_ha_enabled:
            is_active_namenode_cmd = as_user(format(
                "{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"
            ),
                                             params.hdfs_user,
                                             env={
                                                 'PATH': params.hadoop_bin_dir
                                             })
        else:
            is_active_namenode_cmd = False

        # During NonRolling Upgrade, both NameNodes are initially down,
        # so no point in checking if this is the active or standby.
        if upgrade_type == "nonrolling":
            is_active_namenode_cmd = False

        # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
        # no-HA                 | ON -> OFF                | Yes                      |
        # HA and active         | ON -> OFF                | Yes                      |
        # HA and standby        | no change                | no check                 |
        # RU with HA on active  | ON -> OFF                | Yes                      |
        # RU with HA on standby | ON -> OFF                | Yes                      |
        # EU with HA on active  | no change                | no check                 |
        # EU with HA on standby | no change                | no check                 |
        # EU non-HA             | no change                | no check                 |

        check_for_safemode_off = False
        msg = ""
        if params.dfs_ha_enabled:
            if upgrade_type is not None:
                check_for_safemode_off = True
                msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
            else:
                # During normal operations, the NameNode is expected to be up.
                code, out = shell.call(
                    is_active_namenode_cmd,
                    logoutput=True)  # If active NN, code will be 0
                if code == 0:  # active
                    check_for_safemode_off = True
                    msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
                else:
                    msg = "Will remain in the current safemode state."
        else:
            msg = "Must wait to leave safemode since High Availability is not enabled."
            check_for_safemode_off = True

        Logger.info(msg)

        # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
        stay_in_safe_mode = False
        if upgrade_type == "nonrolling":
            stay_in_safe_mode = True

        if check_for_safemode_off:
            Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
            if not stay_in_safe_mode:
                Logger.info(
                    "Wait to leafe safemode since must transition from ON to OFF."
                )
                try:
                    # Wait up to 30 mins
                    Execute(is_namenode_safe_mode_off,
                            tries=180,
                            try_sleep=10,
                            user=params.hdfs_user,
                            logoutput=True)
                except Fail:
                    Logger.error(
                        "NameNode is still in safemode, please be careful with commands that need safemode OFF."
                    )

        # Always run this on non-HA, or active NameNode during HA.
        create_hdfs_directories(is_active_namenode_cmd)

    elif action == "stop":
        import params
        service(action="stop", name="namenode", user=params.hdfs_user)
    elif action == "status":
        import status_params
        check_process_status(status_params.namenode_pid_file)
    elif action == "decommission":
        decommission()
Exemplo n.º 2
0
def namenode(action=None,
             hdfs_binary=None,
             do_format=True,
             upgrade_type=None,
             upgrade_suspended=False,
             env=None):

    if action is None:
        raise Fail('"action" parameter is required for function namenode().')

    if action in ["start", "stop"] and hdfs_binary is None:
        raise Fail(
            '"hdfs_binary" parameter is required for function namenode().')

    if action == "configure":
        import params
        #we need this directory to be present before any action(HA manual steps for
        #additional namenode)
        create_name_dirs(params.dfs_name_dir)

        # set up failover /  secure zookeper ACLs, this feature is supported from HDP 2.6 ownwards
        set_up_zkfc_security(params)
    elif action == "start":
        Logger.info("Called service {0} with upgrade_type: {1}".format(
            action, str(upgrade_type)))
        setup_ranger_hdfs(upgrade_type=upgrade_type)
        import params

        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)

        if do_format and not params.hdfs_namenode_format_disabled:
            format_namenode()
            pass

        if params.dfs_ha_enabled and \
          params.dfs_ha_namenode_standby is not None and \
          params.hostname == params.dfs_ha_namenode_standby:
            # if the current host is the standby NameNode in an HA deployment
            # run the bootstrap command, to start the NameNode in standby mode
            # this requires that the active NameNode is already up and running,
            # so this execute should be re-tried upon failure, up to a timeout
            success = bootstrap_standby_namenode(params)
            if not success:
                raise Fail("Could not bootstrap standby namenode")

        if upgrade_type == constants.UPGRADE_TYPE_ROLLING and params.dfs_ha_enabled:
            # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
            # to kill ZKFC manually, so we need to start it if not already running.
            safe_zkfc_op(action, env)

        options = ""
        if upgrade_type == constants.UPGRADE_TYPE_ROLLING:
            if params.upgrade_direction == Direction.UPGRADE:
                options = "-rollingUpgrade started"
            elif params.upgrade_direction == Direction.DOWNGRADE:
                options = "-rollingUpgrade downgrade"
        elif upgrade_type == constants.UPGRADE_TYPE_NON_ROLLING:
            is_previous_image_dir = is_previous_fs_image()
            Logger.info("Previous file system image dir present is {0}".format(
                str(is_previous_image_dir)))

            if params.upgrade_direction == Direction.UPGRADE:
                options = "-rollingUpgrade started"
            elif params.upgrade_direction == Direction.DOWNGRADE:
                options = "-rollingUpgrade downgrade"
        elif upgrade_type == constants.UPGRADE_TYPE_HOST_ORDERED:
            # nothing special to do for HOU - should be very close to a normal restart
            pass
        elif upgrade_type is None and upgrade_suspended is True:
            # the rollingUpgrade flag must be passed in during a suspended upgrade when starting NN
            if os.path.exists(
                    namenode_upgrade.get_upgrade_in_progress_marker()):
                options = "-rollingUpgrade started"
            else:
                Logger.info(
                    "The NameNode upgrade marker file {0} does not exist, yet an upgrade is currently suspended. "
                    "Assuming that the upgrade of NameNode has not occurred yet."
                    .format(namenode_upgrade.get_upgrade_in_progress_marker()))

        Logger.info("Options for start command are: {0}".format(options))

        service(action="start",
                name="namenode",
                user=params.hdfs_user,
                options=options,
                create_pid_dir=True,
                create_log_dir=True)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            ),
                    user=params.hdfs_user)

        # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
        # no-HA                 | ON -> OFF                | Yes                      |
        # HA and active         | ON -> OFF                | Yes                      |
        # HA and standby        | no change                | No                       |
        # RU with HA on active  | ON -> OFF                | Yes                      |
        # RU with HA on standby | ON -> OFF                | Yes                      |
        # EU with HA on active  | ON -> OFF                | No                       |
        # EU with HA on standby | ON -> OFF                | No                       |
        # EU non-HA             | ON -> OFF                | No                       |

        # because we do things like create directories after starting NN,
        # the vast majority of the time this should be True - it should only
        # be False if this is HA and we are the Standby NN
        ensure_safemode_off = True

        # True if this is the only NameNode (non-HA) or if its the Active one in HA
        is_active_namenode = True

        if params.dfs_ha_enabled:
            Logger.info(
                "Waiting for the NameNode to broadcast whether it is Active or Standby..."
            )

            if is_this_namenode_active() is False:
                # we are the STANDBY NN
                is_active_namenode = False

                # we are the STANDBY NN and this restart is not part of an upgrade
                if upgrade_type is None:
                    ensure_safemode_off = False

        # During an Express Upgrade, NameNode will not leave SafeMode until the DataNodes are started,
        # so always disable the Safemode check
        if upgrade_type == constants.UPGRADE_TYPE_NON_ROLLING:
            ensure_safemode_off = False

        # some informative logging separate from the above logic to keep things a little cleaner
        if ensure_safemode_off:
            Logger.info(
                "Waiting for this NameNode to leave Safemode due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}"
                .format(params.dfs_ha_enabled, is_active_namenode,
                        upgrade_type))
        else:
            Logger.info(
                "Skipping Safemode check due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}"
                .format(params.dfs_ha_enabled, is_active_namenode,
                        upgrade_type))

        # wait for Safemode to end
        if ensure_safemode_off:
            if params.rolling_restart and params.rolling_restart_safemode_exit_timeout:
                calculated_retries = int(
                    params.rolling_restart_safemode_exit_timeout) / 30
                wait_for_safemode_off(hdfs_binary,
                                      afterwait_sleep=30,
                                      retries=calculated_retries,
                                      sleep_seconds=30)
            else:
                wait_for_safemode_off(hdfs_binary)

        # Always run this on the "Active" NN unless Safemode has been ignored
        # in the case where safemode was ignored (like during an express upgrade), then
        # NN will be in SafeMode and cannot have directories created
        if is_active_namenode and ensure_safemode_off:
            create_hdfs_directories()
            create_ranger_audit_hdfs_directories()
        else:
            Logger.info(
                "Skipping creation of HDFS directories since this is either not the Active NameNode or we did not wait for Safemode to finish."
            )

    elif action == "stop":
        import params
        service(action="stop", name="namenode", user=params.hdfs_user)
    elif action == "status":
        import status_params
        check_process_status(status_params.namenode_pid_file)
    elif action == "decommission":
        decommission()
Exemplo n.º 3
0
def namenode(action=None, do_format=True, rolling_restart=False, env=None):
    import params
    #we need this directory to be present before any action(HA manual steps for
    #additional namenode)
    if action == "configure":
        create_name_dirs(params.dfs_name_dir)

    if action == "start":
        if do_format:
            format_namenode()
            pass

        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)

        Directory(params.hadoop_pid_dir_prefix,
                  mode=0755,
                  owner=params.hdfs_user,
                  group=params.user_group)

        if params.dfs_ha_enabled and \
          params.dfs_ha_namenode_standby is not None and \
          params.hostname == params.dfs_ha_namenode_standby:
            # if the current host is the standby NameNode in an HA deployment
            # run the bootstrap command, to start the NameNode in standby mode
            # this requires that the active NameNode is already up and running,
            # so this execute should be re-tried upon failure, up to a timeout
            success = bootstrap_standby_namenode(params)
            if not success:
                raise Fail("Could not bootstrap standby namenode")

        options = "-rollingUpgrade started" if rolling_restart else ""

        if rolling_restart:
            # Must start Zookeeper Failover Controller if it exists on this host because it could have been killed in order to initiate the failover.
            safe_zkfc_op(action, env)

        service(action="start",
                name="namenode",
                user=params.hdfs_user,
                options=options,
                create_pid_dir=True,
                create_log_dir=True)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            ),
                    user=params.hdfs_user)

        if params.dfs_ha_enabled:
            dfs_check_nn_status_cmd = as_user(format(
                "hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"
            ),
                                              params.hdfs_user,
                                              env={
                                                  'PATH': params.hadoop_bin_dir
                                              })
        else:
            dfs_check_nn_status_cmd = None

        namenode_safe_mode_off = format(
            "hadoop dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'"
        )

        # If HA is enabled and it is in standby, then stay in safemode, otherwise, leave safemode.
        leave_safe_mode = True
        if dfs_check_nn_status_cmd is not None:
            code, out = shell.call(
                dfs_check_nn_status_cmd)  # If active NN, code will be 0
            if code != 0:
                leave_safe_mode = False

        if leave_safe_mode:
            # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON), if so, then leave it
            code, out = shell.call(namenode_safe_mode_off)
            if code != 0:
                leave_safe_mode_cmd = format(
                    "hdfs --config {hadoop_conf_dir} dfsadmin -fs {namenode_address} -safemode leave"
                )
                Execute(
                    leave_safe_mode_cmd,
                    user=params.hdfs_user,
                    path=[params.hadoop_bin_dir],
                )

        # Verify if Namenode should be in safemode OFF
        Execute(
            namenode_safe_mode_off,
            tries=40,
            try_sleep=10,
            path=[params.hadoop_bin_dir],
            user=params.hdfs_user,
            only_if=dfs_check_nn_status_cmd  #skip when HA not active
        )
        create_hdfs_directories(dfs_check_nn_status_cmd)

    if action == "stop":
        service(action="stop", name="namenode", user=params.hdfs_user)

    if action == "decommission":
        decommission()
Exemplo n.º 4
0
def namenode(action=None,
             hdfs_binary=None,
             do_format=True,
             upgrade_type=None,
             env=None):
    if action is None:
        raise Fail('"action" parameter is required for function namenode().')

    if action in ["start", "stop"] and hdfs_binary is None:
        raise Fail(
            '"hdfs_binary" parameter is required for function namenode().')

    if action == "configure":
        import params
        #we need this directory to be present before any action(HA manual steps for
        #additional namenode)
        create_name_dirs(params.dfs_name_dir)
    elif action == "start":
        Logger.info("Called service {0} with upgrade_type: {1}".format(
            action, str(upgrade_type)))
        setup_ranger_hdfs(upgrade_type=upgrade_type)
        import params
        if do_format and not params.hdfs_namenode_format_disabled:
            format_namenode()
            pass

        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)

        if params.dfs_ha_enabled and \
          params.dfs_ha_namenode_standby is not None and \
          params.hostname == params.dfs_ha_namenode_standby:
            # if the current host is the standby NameNode in an HA deployment
            # run the bootstrap command, to start the NameNode in standby mode
            # this requires that the active NameNode is already up and running,
            # so this execute should be re-tried upon failure, up to a timeout
            success = bootstrap_standby_namenode(params)
            if not success:
                raise Fail("Could not bootstrap standby namenode")

        if upgrade_type == "rolling" and params.dfs_ha_enabled:
            # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
            # to kill ZKFC manually, so we need to start it if not already running.
            safe_zkfc_op(action, env)

        options = ""
        if upgrade_type == "rolling":
            if params.upgrade_direction == Direction.UPGRADE:
                options = "-rollingUpgrade started"
            elif params.upgrade_direction == Direction.DOWNGRADE:
                options = "-rollingUpgrade downgrade"

        elif upgrade_type == "nonrolling":
            is_previous_image_dir = is_previous_fs_image()
            Logger.info(
                format(
                    "Previous file system image dir present is {is_previous_image_dir}"
                ))

            if params.upgrade_direction == Direction.UPGRADE:
                options = "-rollingUpgrade started"
            elif params.upgrade_direction == Direction.DOWNGRADE:
                options = "-rollingUpgrade downgrade"

        Logger.info(format("Option for start command: {options}"))

        service(action="start",
                name="namenode",
                user=params.hdfs_user,
                options=options,
                create_pid_dir=True,
                create_log_dir=True)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            ),
                    user=params.hdfs_user)

        if params.dfs_ha_enabled:
            is_active_namenode_cmd = as_user(format(
                "{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"
            ),
                                             params.hdfs_user,
                                             env={
                                                 'PATH': params.hadoop_bin_dir
                                             })
        else:
            is_active_namenode_cmd = True

        # During NonRolling Upgrade, both NameNodes are initially down,
        # so no point in checking if this is the active or standby.
        if upgrade_type == "nonrolling":
            is_active_namenode_cmd = False

        # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
        # no-HA                 | ON -> OFF                | Yes                      |
        # HA and active         | ON -> OFF                | Yes                      |
        # HA and standby        | no change                | no check                 |
        # RU with HA on active  | ON -> OFF                | Yes                      |
        # RU with HA on standby | ON -> OFF                | Yes                      |
        # EU with HA on active  | no change                | no check                 |
        # EU with HA on standby | no change                | no check                 |
        # EU non-HA             | no change                | no check                 |

        check_for_safemode_off = False
        msg = ""
        if params.dfs_ha_enabled:
            if upgrade_type is not None:
                check_for_safemode_off = True
                msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
            else:
                Logger.info("Wait for NameNode to become active.")
                if is_active_namenode(hdfs_binary):  # active
                    check_for_safemode_off = True
                    msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
                else:
                    msg = "Will remain in the current safemode state."
        else:
            msg = "Must wait to leave safemode since High Availability is not enabled."
            check_for_safemode_off = True

        Logger.info(msg)

        # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
        stay_in_safe_mode = False
        if upgrade_type == "nonrolling":
            stay_in_safe_mode = True

        if check_for_safemode_off:
            Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
            if not stay_in_safe_mode:
                wait_for_safemode_off(hdfs_binary)

        # Always run this on non-HA, or active NameNode during HA.
        create_hdfs_directories(is_active_namenode_cmd)
        create_ranger_audit_hdfs_directories(is_active_namenode_cmd)

    elif action == "stop":
        import params
        service(action="stop", name="namenode", user=params.hdfs_user)
    elif action == "status":
        import status_params
        check_process_status(status_params.namenode_pid_file)
    elif action == "decommission":
        decommission()
Exemplo n.º 5
0
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, env=None):
  if action is None:
    raise Fail('"action" parameter is required for function namenode().')

  if action in ["start", "stop"] and hdfs_binary is None:
    raise Fail('"hdfs_binary" parameter is required for function namenode().')

  if action == "configure":
    import params
    #we need this directory to be present before any action(HA manual steps for
    #additional namenode)
    create_name_dirs(params.dfs_name_dir)
  elif action == "start":
    Logger.info("Called service {0} with upgrade_type: {1}".format(action, str(upgrade_type)))
    setup_ranger_hdfs(upgrade_type=upgrade_type)
    import params
    if do_format and not params.hdfs_namenode_format_disabled:
      format_namenode()
      pass

    File(params.exclude_file_path,
         content=Template("exclude_hosts_list.j2"),
         owner=params.hdfs_user,
         group=params.user_group
    )

    if params.dfs_ha_enabled and \
      params.dfs_ha_namenode_standby is not None and \
      params.hostname == params.dfs_ha_namenode_standby:
        # if the current host is the standby NameNode in an HA deployment
        # run the bootstrap command, to start the NameNode in standby mode
        # this requires that the active NameNode is already up and running,
        # so this execute should be re-tried upon failure, up to a timeout
        success = bootstrap_standby_namenode(params)
        if not success:
          raise Fail("Could not bootstrap standby namenode")

    if upgrade_type == "rolling" and params.dfs_ha_enabled:
      # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
      # to kill ZKFC manually, so we need to start it if not already running.
      safe_zkfc_op(action, env)

    options = ""
    if upgrade_type == "rolling":
      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"
        
    elif upgrade_type == "nonrolling":
      is_previous_image_dir = is_previous_fs_image()
      Logger.info(format("Previous file system image dir present is {is_previous_image_dir}"))

      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"

    Logger.info(format("Option for start command: {options}"))

    service(
      action="start",
      name="namenode",
      user=params.hdfs_user,
      options=options,
      create_pid_dir=True,
      create_log_dir=True
    )

    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
              user = params.hdfs_user)

    if params.dfs_ha_enabled:
      is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    else:
      is_active_namenode_cmd = True
    
    # During NonRolling Upgrade, both NameNodes are initially down,
    # so no point in checking if this is the active or standby.
    if upgrade_type == "nonrolling":
      is_active_namenode_cmd = False

    # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
    # no-HA                 | ON -> OFF                | Yes                      |
    # HA and active         | ON -> OFF                | Yes                      |
    # HA and standby        | no change                | no check                 |
    # RU with HA on active  | ON -> OFF                | Yes                      |
    # RU with HA on standby | ON -> OFF                | Yes                      |
    # EU with HA on active  | no change                | no check                 |
    # EU with HA on standby | no change                | no check                 |
    # EU non-HA             | no change                | no check                 |

    check_for_safemode_off = False
    msg = ""
    if params.dfs_ha_enabled:
      if upgrade_type is not None:
        check_for_safemode_off = True
        msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
      else:
        Logger.info("Wait for NameNode to become active.")
        if is_active_namenode(hdfs_binary): # active
          check_for_safemode_off = True
          msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
        else:
          msg = "Will remain in the current safemode state."
    else:
      msg = "Must wait to leave safemode since High Availability is not enabled."
      check_for_safemode_off = True

    Logger.info(msg)

    # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
    stay_in_safe_mode = False
    if upgrade_type == "nonrolling":
      stay_in_safe_mode = True

    if check_for_safemode_off:
      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
      if not stay_in_safe_mode:
        wait_for_safemode_off(hdfs_binary)

    # Always run this on non-HA, or active NameNode during HA.
    create_hdfs_directories(is_active_namenode_cmd)
    create_ranger_audit_hdfs_directories(is_active_namenode_cmd)

  elif action == "stop":
    import params
    service(
      action="stop", name="namenode", 
      user=params.hdfs_user
    )
  elif action == "status":
    import status_params
    check_process_status(status_params.namenode_pid_file)
  elif action == "decommission":
    decommission()
Exemplo n.º 6
0
def namenode(action=None, do_format=True, rolling_restart=False, env=None):
    if action == "configure":
        import params
        #we need this directory to be present before any action(HA manual steps for
        #additional namenode)
        create_name_dirs(params.dfs_name_dir)
    elif action == "start":
        setup_ranger_hdfs(rolling_upgrade=rolling_restart)
        import params
        if do_format:
            format_namenode()
            pass

        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)

        if params.dfs_ha_enabled and \
          params.dfs_ha_namenode_standby is not None and \
          params.hostname == params.dfs_ha_namenode_standby:
            # if the current host is the standby NameNode in an HA deployment
            # run the bootstrap command, to start the NameNode in standby mode
            # this requires that the active NameNode is already up and running,
            # so this execute should be re-tried upon failure, up to a timeout
            success = bootstrap_standby_namenode(params)
            if not success:
                raise Fail("Could not bootstrap standby namenode")

        options = "-rollingUpgrade started" if rolling_restart else ""

        if rolling_restart:
            # Must start Zookeeper Failover Controller if it exists on this host because it could have been killed in order to initiate the failover.
            safe_zkfc_op(action, env)

        service(action="start",
                name="namenode",
                user=params.hdfs_user,
                options=options,
                create_pid_dir=True,
                create_log_dir=True)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            ),
                    user=params.hdfs_user)

        is_namenode_safe_mode_off = format(
            "hdfs dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'"
        )
        if params.dfs_ha_enabled:
            is_active_namenode_cmd = as_user(format(
                "hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"
            ),
                                             params.hdfs_user,
                                             env={
                                                 'PATH': params.hadoop_bin_dir
                                             })
        else:
            is_active_namenode_cmd = None

        # During normal operations, if HA is enabled and it is in standby, then no need to check safemode staus.
        # During Rolling Upgrade, both namenodes must eventually leave safemode, and Ambari can wait for this.

        # ___Scenario_________|_Expected safemode state___|_Wait for safemode OFF____|
        # 1 (HA and active)   | ON -> OFF                 | Yes                      |
        # 2 (HA and standby)  | no change (yes during RU) | no check (yes during RU) |
        # 3 (no-HA)           | ON -> OFF                 | Yes                      |
        check_for_safemode_off = False
        msg = ""
        if params.dfs_ha_enabled:
            code, out = shell.call(
                is_active_namenode_cmd,
                logoutput=True)  # If active NN, code will be 0
            if code == 0:  # active
                check_for_safemode_off = True
                msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
            elif rolling_restart:
                check_for_safemode_off = True
                msg = "Must wait to leave safemode since High Availability is enabled during a Rolling Upgrade"
        else:
            msg = "Must wait to leave safemode since High Availability is not enabled."
            check_for_safemode_off = True

        if not msg:
            msg = "Will remain in the current safemode state."
        Logger.info(msg)

        if check_for_safemode_off:
            # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON). If safemode is OFF, no change.
            # If safemode is ON, first wait for NameNode to leave safemode on its own (if that doesn't happen within 30 seconds, then
            # force NameNode to leave safemode).
            Logger.info(
                "Checking the NameNode safemode status since may need to transition from ON to OFF."
            )

            try:
                # Wait up to 30 mins
                Execute(is_namenode_safe_mode_off,
                        tries=180,
                        try_sleep=10,
                        user=params.hdfs_user,
                        logoutput=True)
            except Fail:
                Logger.error(
                    "NameNode is still in safemode, please be careful with commands that need safemode OFF."
                )

        # Always run this on non-HA, or active NameNode during HA.
        create_hdfs_directories(is_active_namenode_cmd)

    elif action == "stop":
        import params
        service(action="stop", name="namenode", user=params.hdfs_user)
    elif action == "status":
        import status_params
        check_process_status(status_params.namenode_pid_file)
    elif action == "decommission":
        decommission()
Exemplo n.º 7
0
def namenode(action=None, do_format=True, upgrade_type=None, env=None):
  import params
  #we need this directory to be present before any action(HA manual steps for
  #additional namenode)
  if action == "configure":
    create_name_dirs(params.dfs_name_dir)

  if action == "start":
    if do_format:
      format_namenode()
      pass

    File(params.exclude_file_path,
         content=Template("exclude_hosts_list.j2"),
         owner=params.hdfs_user,
         group=params.user_group
    )

    Directory(params.hadoop_pid_dir_prefix,
              mode=0755,
              owner=params.hdfs_user,
              group=params.user_group
    )

    if params.dfs_ha_enabled and \
      params.dfs_ha_namenode_standby is not None and \
      params.hostname == params.dfs_ha_namenode_standby:
        # if the current host is the standby NameNode in an HA deployment
        # run the bootstrap command, to start the NameNode in standby mode
        # this requires that the active NameNode is already up and running,
        # so this execute should be re-tried upon failure, up to a timeout
        success = bootstrap_standby_namenode(params)
        if not success:
          raise Fail("Could not bootstrap standby namenode")

    if upgrade_type == "rolling":
      # Must start Zookeeper Failover Controller if it exists on this host because it could have been killed in order to initiate the failover.
      safe_zkfc_op(action, env)

    #options = "-rollingUpgrade started" if rolling_restart else ""
    options = ""
    if upgrade_type == "rolling":
      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"
    elif upgrade_type == "nonrolling":
      is_previous_image_dir = is_previous_fs_image()
      Logger.info(format("Previous file system image dir present is {is_previous_image_dir}"))

      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"

    Logger.info(format("Option for start command: {options}"))

    service(
      action="start",
      name="namenode",
      user=params.hdfs_user,
      options=options,
      create_pid_dir=True,
      create_log_dir=True
    )


    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
              user = params.hdfs_user)

    if params.dfs_ha_enabled:
      is_active_namenode_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})

    else:
      is_active_namenode_cmd = True

    # During NonRolling Upgrade, both NameNodes are initially down,
    # so no point in checking if this is the active or standby.
    if upgrade_type == "nonrolling":
      is_active_namenode_cmd = False

    # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
    # no-HA                 | ON -> OFF                | Yes                      |
    # HA and active         | ON -> OFF                | Yes                      |
    # HA and standby        | no change                | no check                 |
    # RU with HA on active  | ON -> OFF                | Yes                      |
    # RU with HA on standby | ON -> OFF                | Yes                      |
    # EU with HA on active  | no change                | no check                 |
    # EU with HA on standby | no change                | no check                 |
    # EU non-HA             | no change                | no check                 |

    check_for_safemode_off = False
    msg = ""
    if params.dfs_ha_enabled:
      if upgrade_type is not None:
        check_for_safemode_off = True
        msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
      else:
        Logger.info("Wait for NameNode to become active.")
        if is_active_namenode(): # active
          check_for_safemode_off = True
          msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
        else:
          msg = "Will remain in the current safemode state."
    else:
      msg = "Must wait to leave safemode since High Availability is not enabled."
      check_for_safemode_off = True

    Logger.info(msg)

    # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
    stay_in_safe_mode = False
    if upgrade_type == "nonrolling":
      stay_in_safe_mode = True

    if check_for_safemode_off:
      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
      if not stay_in_safe_mode:
        wait_for_safemode_off()

    # Always run this on non-HA, or active NameNode during HA.
    create_hdfs_directories(is_active_namenode_cmd)

    '''if params.dfs_ha_enabled:
      dfs_check_nn_status_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    else:
      dfs_check_nn_status_cmd = None

    namenode_safe_mode_off = format("hdfs dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'")

    # If HA is enabled and it is in standby, then stay in safemode, otherwise, leave safemode.
    leave_safe_mode = True
    if dfs_check_nn_status_cmd is not None:
      code, out = shell.call(dfs_check_nn_status_cmd) # If active NN, code will be 0
      if code != 0:
        leave_safe_mode = False

    if leave_safe_mode:
      # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON), if so, then leave it
      code, out = shell.call(namenode_safe_mode_off)
      if code != 0:
        leave_safe_mode_cmd = format("hdfs --config {hadoop_conf_dir} dfsadmin -fs {namenode_address} -safemode leave")
        Execute(leave_safe_mode_cmd,
                tries=10,
                try_sleep=10,
                user=params.hdfs_user,
                path=[params.hadoop_bin_dir],
        )

    # Verify if Namenode should be in safemode OFF
    Execute(namenode_safe_mode_off,
            tries=40,
            try_sleep=10,
            path=[params.hadoop_bin_dir],
            user=params.hdfs_user,
            only_if=dfs_check_nn_status_cmd #skip when HA not active
    )
    create_hdfs_directories(dfs_check_nn_status_cmd)'''

  if action == "stop":
    service(
      action="stop", name="namenode",
      user=params.hdfs_user
    )

  if action == "decommission":
    decommission()
Exemplo n.º 8
0
def namenode(action=None, do_format=True, rolling_restart=False, env=None):
  import params
  #we need this directory to be present before any action(HA manual steps for
  #additional namenode)
  if action == "configure":
    create_name_dirs(params.dfs_name_dir)

  if action == "start":
    if do_format:
      format_namenode()
      pass

    File(params.exclude_file_path,
         content=Template("exclude_hosts_list.j2"),
         owner=params.hdfs_user,
         group=params.user_group
    )

    Directory(params.hadoop_pid_dir_prefix,
              mode=0755,
              owner=params.hdfs_user,
              group=params.user_group
    )

    if params.dfs_ha_enabled and \
      params.dfs_ha_namenode_standby is not None and \
      params.hostname == params.dfs_ha_namenode_standby:
        # if the current host is the standby NameNode in an HA deployment
        # run the bootstrap command, to start the NameNode in standby mode
        # this requires that the active NameNode is already up and running,
        # so this execute should be re-tried upon failure, up to a timeout
        success = bootstrap_standby_namenode(params)
        if not success:
          raise Fail("Could not bootstrap standby namenode")

    options = "-rollingUpgrade started" if rolling_restart else ""

    if rolling_restart:    
      # Must start Zookeeper Failover Controller if it exists on this host because it could have been killed in order to initiate the failover.
      safe_zkfc_op(action, env)

    service(
      action="start",
      name="namenode",
      user=params.hdfs_user,
      options=options,
      create_pid_dir=True,
      create_log_dir=True
    )


    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
              user = params.hdfs_user)

    if params.dfs_ha_enabled:
      dfs_check_nn_status_cmd = as_user(format("hdfs --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    else:
      dfs_check_nn_status_cmd = None

    namenode_safe_mode_off = format("hadoop dfsadmin -safemode get | grep 'Safe mode is OFF'")

    # If HA is enabled and it is in standby, then stay in safemode, otherwise, leave safemode.
    leave_safe_mode = True
    if dfs_check_nn_status_cmd is not None:
      code, out = shell.call(dfs_check_nn_status_cmd) # If active NN, code will be 0
      if code != 0:
        leave_safe_mode = False

    if leave_safe_mode:
      # First check if Namenode is not in 'safemode OFF' (equivalent to safemode ON), if so, then leave it
      code, out = shell.call(namenode_safe_mode_off)
      if code != 0:
        leave_safe_mode_cmd = format("hdfs --config {hadoop_conf_dir} dfsadmin -safemode leave")
        try:
            Execute(leave_safe_mode_cmd,
                    user=params.hdfs_user,
                    path=[params.hadoop_bin_dir],
            )
        except:
            # Sleep and retry
            time.sleep(60)
            Execute(leave_safe_mode_cmd,
                    user=params.hdfs_user,
                    path=[params.hadoop_bin_dir],
                    )

    # Verify if Namenode should be in safemode OFF
    Execute(namenode_safe_mode_off,
            tries=40,
            try_sleep=10,
            path=[params.hadoop_bin_dir],
            user=params.hdfs_user,
            only_if=dfs_check_nn_status_cmd #skip when HA not active
    )
    create_hdfs_directories(dfs_check_nn_status_cmd)

  if action == "stop":
    service(
      action="stop", name="namenode", 
      user=params.hdfs_user
    )

  if action == "decommission":
    decommission()