Пример #1
0
def install_tez_jars():
  import params
  
  destination_hdfs_dirs = get_tez_hdfs_dir_paths(params.tez_lib_uris)

  # If tez libraries are to be stored in hdfs
  if destination_hdfs_dirs:
    for hdfs_dir in destination_hdfs_dirs:
      params.HdfsDirectory(hdfs_dir,
                          action="create_delayed",
                          owner=params.tez_user,
                          mode=0755
      )
    pass
    params.HdfsDirectory(None, action="create")

    if params.security_enabled:
      kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
    else:
      kinit_if_needed = ""

    if kinit_if_needed:
      Execute(kinit_if_needed,
              user=params.tez_user,
              path='/bin'
      )
    pass

    app_dir_path = None
    lib_dir_path = None

    if len(destination_hdfs_dirs) > 1:
      for path in destination_hdfs_dirs:
        if 'lib' in path:
          lib_dir_path = path
        else:
          app_dir_path = path
        pass
      pass
    pass

    if app_dir_path:
      CopyFromLocal(params.tez_local_api_jars,
                    mode=0755,
                    owner=params.tez_user,
                    dest_dir=app_dir_path,
                    kinnit_if_needed=kinit_if_needed,
                    hdfs_user=params.hdfs_user
      )
    pass

    if lib_dir_path:
      CopyFromLocal(params.tez_local_lib_jars,
                    mode=0755,
                    owner=params.tez_user,
                    dest_dir=lib_dir_path,
                    kinnit_if_needed=kinit_if_needed,
                    hdfs_user=params.hdfs_user
      )
    pass
Пример #2
0
def create_hdfs_directories():
    import params

    params.HdfsDirectory("/tmp",
                         action="create_delayed",
                         owner=params.hdfs_user,
                         mode=0777)
    params.HdfsDirectory(params.smoke_hdfs_user_dir,
                         action="create_delayed",
                         owner=params.smoke_user,
                         mode=params.smoke_hdfs_user_mode)
    params.HdfsDirectory(None, action="create")
Пример #3
0
def falcon(type, action=None):
    import params
    if action == 'config':
        Directory(params.falcon_pid_dir, owner=params.falcon_user)
        Directory(params.falcon_log_dir, owner=params.falcon_user)
        Directory(params.falcon_webapp_dir, owner=params.falcon_user)
        Directory(params.falcon_home, owner=params.falcon_user)
        Directory(params.falcon_conf_dir,
                  owner=params.falcon_user,
                  recursive=True)
        File(params.falcon_conf_dir + '/falcon-env.sh',
             content=InlineTemplate(params.falcon_env_sh_template))
        File(params.falcon_conf_dir + '/client.properties',
             content=Template('client.properties.j2'),
             mode=0644)
        PropertiesFile(params.falcon_conf_dir + '/runtime.properties',
                       properties=params.falcon_runtime_properties,
                       mode=0644)
        PropertiesFile(params.falcon_conf_dir + '/startup.properties',
                       properties=params.falcon_startup_properties,
                       mode=0644)
    if type == 'server':
        if action == 'config':
            if params.store_uri[0:4] == "hdfs":
                params.HdfsDirectory(params.store_uri,
                                     action="create_delayed",
                                     owner=params.falcon_user,
                                     mode=0755)
            params.HdfsDirectory(
                params.flacon_apps_dir,
                action="create_delayed",
                owner=params.falcon_user,
                mode=0777  #TODO change to proper mode
            )
            params.HdfsDirectory(None, action="create")
            Directory(params.falcon_local_dir,
                      owner=params.falcon_user,
                      recursive=True)
            if params.falcon_embeddedmq_enabled == True:
                Directory(params.falcon_embeddedmq_data,
                          owner=params.falcon_user,
                          recursive=True)

        if action == 'start':
            Execute(
                format('{falcon_home}/bin/falcon-start -port {falcon_port}'),
                user=params.falcon_user,
                path=params.hadoop_bin_dir)
        if action == 'stop':
            Execute(format('{falcon_home}/bin/falcon-stop'),
                    user=params.falcon_user,
                    path=params.hadoop_bin_dir)
            File(params.server_pid_file, action='delete')
Пример #4
0
def falcon(type, action=None):
    import params
    Directory(params.falcon_pid_dir, owner=params.falcon_user)
    Directory(params.falcon_log_dir, owner=params.falcon_user)
    Directory(params.falcon_webapp_dir, owner=params.falcon_user)
    if type == 'client':
        if action == 'config':
            File(params.falcon_conf_dir + '/client.properties',
                 content=Template('client.properties.j2'),
                 mode=0644)
    elif type == 'server':
        if action == 'config':
            if params.store_uri[0:4] == "hdfs":
                params.HdfsDirectory(params.store_uri,
                                     action="create_delayed",
                                     owner=params.falcon_user,
                                     mode=0755)
            params.HdfsDirectory(
                params.flacon_apps_dir,
                action="create_delayed",
                owner=params.falcon_user,
                mode=0777  #TODO change to proper mode
            )
            params.HdfsDirectory(None, action="create")
            Directory(params.falcon_local_dir,
                      owner=params.falcon_user,
                      recursive=True)
            Directory(params.falcon_data_dir,
                      owner=params.falcon_user,
                      recursive=True)
            PropertiesFile(params.falcon_conf_dir + '/runtime.properties',
                           properties=params.falcon_runtime_properties,
                           mode=0644)
            PropertiesFile(params.falcon_conf_dir + '/startup.properties',
                           properties=params.falcon_startup_properties,
                           mode=0644)
        if action == 'start':
            Execute(format(
                'env JAVA_HOME={java_home} FALCON_LOG_DIR={falcon_log_dir} '
                'FALCON_PID_DIR=/var/run/falcon FALCON_DATA_DIR={falcon_data_dir} '
                '{falcon_home}/bin/falcon-start -port {falcon_port}'),
                    user=params.falcon_user)
        if action == 'stop':
            Execute(format(
                'env JAVA_HOME={java_home} FALCON_LOG_DIR={falcon_log_dir} '
                'FALCON_PID_DIR=/var/run/falcon FALCON_DATA_DIR={falcon_data_dir} '
                '{falcon_home}/bin/falcon-stop'),
                    user=params.falcon_user)
            File(params.server_pid_file, action='delete')
Пример #5
0
    def install_hive_exec_jar(self, params):
        hdfs_path_prefix = 'hdfs://'
        if params.tez_lib_uris:
            hdfs_path = params.hive_exec_hdfs_path

            if hdfs_path.strip().find(hdfs_path_prefix, 0) != -1:
                hdfs_path = hdfs_path.replace(hdfs_path_prefix, '')
            pass

            params.HdfsDirectory(hdfs_path,
                                 action="create",
                                 owner=params.hive_user,
                                 mode=0755)

            if params.security_enabled:
                kinit_if_needed = format(
                    "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_user};")
            else:
                kinit_if_needed = ""

            if kinit_if_needed:
                Execute(kinit_if_needed, user=params.tez_user, path='/bin')

            CopyFromLocal(params.hive_exec_jar_path,
                          mode=0655,
                          owner=params.hive_user,
                          dest_dir=hdfs_path,
                          kinnit_if_needed=kinit_if_needed,
                          hdfs_user=params.hdfs_user)
        pass
Пример #6
0
def create_hdfs_directories(check):
    import params

    params.HdfsDirectory("/tmp",
                         action="create_delayed",
                         owner=params.hdfs_user,
                         mode=0777)
    params.HdfsDirectory(params.smoke_hdfs_user_dir,
                         action="create_delayed",
                         owner=params.smoke_user,
                         mode=params.smoke_hdfs_user_mode)
    params.HdfsDirectory(
        None,
        action="create",
        only_if=check  #skip creation when HA not active
    )
    def start(self, env):
        import params
        self.configure(env)

        # Check HDFS set up
        # Must be in start section, since we need HDFS running
        params.HdfsDirectory(
            "/hbase/archive",
            action="create_on_execute",
            owner=params.hbase_user,
            group=params.hbase_user,
        )
        params.HdfsDirectory(
            params.hbase_staging,
            action="create_on_execute",
            owner=params.hbase_user,
            group=params.hbase_user,
        )
        params.HdfsDirectory(
            "/user/trafodion",
            action="create_on_execute",
            owner=params.traf_user,
            group=params.traf_group,
            mode=0755,
        )
        params.HdfsDirectory(
            "/user/trafodion/trafodion_backups",
            action="create_on_execute",
            owner=params.traf_user,
            group=params.traf_group,
        )
        params.HdfsDirectory(
            "/user/trafodion/bulkload",
            action="create_on_execute",
            owner=params.traf_user,
            group=params.user_group,
            mode=0750,
        )
        params.HdfsDirectory(
            "/user/trafodion/lobs",
            action="create_on_execute",
            owner=params.traf_user,
            group=params.traf_group,
        )
        params.HdfsDirectory(None, action="execute")

        try:
            cmd = "hdfs dfs -setfacl -R -m user:%s:rwx,default:user:%s:rwx,mask::rwx /hbase/archive" % \
                     (params.traf_user, params.traf_user)
            Execute(cmd, user=params.hdfs_user)
        except:
            print "Error: HDFS ACLs must be enabled for config of hdfs:/hbase/archive"
            print "       Re-start HDFS, HBase, and other affected components before starting Trafodion"
            raise Fail("Need HDFS component re-start")

        # Start trafodion
        Execute('source ~/.bashrc ; sqstart',
                user=params.traf_user,
                logoutput=True)
Пример #8
0
def oozie(
        is_server=False  # TODO: see if see can remove this
):
    import params

    if is_server:
        params.HdfsDirectory(params.oozie_hdfs_user_dir,
                             action="create",
                             owner=params.oozie_user,
                             mode=params.oozie_hdfs_user_mode)
    #TODO hack for falcon el
    oozie_site = dict(params.config['configurations']['oozie-site'])
    oozie_site[
        "oozie.services.ext"] = 'org.apache.oozie.service.JMSAccessorService,' + oozie_site[
            "oozie.services.ext"]
    XmlConfig("oozie-site.xml",
              conf_dir=params.conf_dir,
              configurations=oozie_site,
              owner=params.oozie_user,
              group=params.user_group,
              mode=0664)
    Directory(params.conf_dir,
              owner=params.oozie_user,
              group=params.user_group)

    TemplateConfig(format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user)

    if (params.log4j_props != None):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user,
             content=params.log4j_props)
    elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user)

    environment = {"no_proxy": format("{ambari_server_hostname}")}

    if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \
       params.jdbc_driver_name == "org.postgresql.Driver" or \
       params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver":
        Execute(format("/bin/sh -c 'cd /usr/lib/ambari-agent/ &&\
    curl -kf \
    --retry 5 {jdk_location}{check_db_connection_jar_name}\
     -o {check_db_connection_jar_name}'"),
                not_if=format("[ -f {check_db_connection_jar} ]"),
                environment=environment)

    oozie_ownership()

    if is_server:
        oozie_server_specific()
Пример #9
0
def oozie(is_server=False):
    import params

    if is_server:
        params.HdfsDirectory(params.oozie_hdfs_user_dir,
                             action="create",
                             owner=params.oozie_user,
                             mode=params.oozie_hdfs_user_mode)
    XmlConfig(
        "oozie-site.xml",
        conf_dir=params.conf_dir,
        configurations=params.config['configurations']['oozie-site'],
        configuration_attributes=params.config['configuration_attributes']
        ['oozie-site'],
        owner=params.oozie_user,
        group=params.user_group,
        mode=0664)

    Directory(params.conf_dir,
              owner=params.oozie_user,
              group=params.user_group)

    File(format("{conf_dir}/oozie-env.sh"),
         owner=params.oozie_user,
         content=InlineTemplate(params.oozie_env_sh_template))

    if (params.log4j_props != None):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user,
             content=params.log4j_props)
    elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user)

    environment = {"no_proxy": format("{ambari_server_hostname}")}

    if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \
       params.jdbc_driver_name == "org.postgresql.Driver" or \
       params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver":
        Execute(format("/bin/sh -c 'cd /usr/lib/ambari-agent/ &&\
    curl -kf -x \"\" \
    --retry 5 {jdk_location}{check_db_connection_jar_name}\
     -o {check_db_connection_jar_name}'"),
                not_if=format("[ -f {check_db_connection_jar} ]"),
                environment=environment)

    oozie_ownership()

    if is_server:
        oozie_server_specific()
Пример #10
0
def mapreduce(name=None):
    import params

    if name in ["jobtracker", "historyserver"]:
        params.HdfsDirectory("/mapred",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/system",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/history",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)
        params.HdfsDirectory(params.mapreduce_jobhistory_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)
        params.HdfsDirectory(None, action="create")

    Directory(params.mapred_pid_dir,
              owner=params.mapred_user,
              group=params.user_group,
              recursive=True)

    mapred_log_dir = os.path.join(params.mapred_log_dir_prefix,
                                  params.mapred_user)
    Directory(mapred_log_dir,
              recursive=True,
              owner=params.mapred_user,
              group=params.user_group)

    if name == 'jobtracker':
        File(os.path.join(mapred_log_dir, 'hadoop-mapreduce.jobsummary.log'),
             owner=params.mapred_user,
             group=params.user_group,
             mode=0664)

    Directory(params.mapred_local_dir.split(','),
              owner=params.mapred_user,
              mode=0755,
              recursive=True,
              ignore_failures=True)

    File(
        params.exclude_file_path,
        owner=params.mapred_user,
        group=params.user_group,
    )

    File(
        params.mapred_hosts_file_path,
        owner=params.mapred_user,
        group=params.user_group,
    )
Пример #11
0
def setup_spark(env, type, action=None):
    import params

    env.set_params(params)

    Directory([params.spark_pid_dir, params.spark_log_dir],
              owner=params.spark_user,
              group=params.user_group,
              recursive=True)
    if type == 'server':
        if action == 'start' or action == 'config':
            params.HdfsDirectory(params.spark_hdfs_user_dir,
                                 action="create",
                                 owner=params.spark_user,
                                 mode=0775)

    file_path = params.spark_conf + '/spark-defaults.conf'
    create_file(file_path)

    write_properties_to_file(file_path, spark_properties(params))

    # create spark-env.sh in etc/conf dir
    File(os.path.join(params.spark_conf, 'spark-env.sh'),
         owner=params.spark_user,
         group=params.spark_group,
         content=InlineTemplate(params.spark_env_sh))

    #create log4j.properties in etc/conf dir
    File(os.path.join(params.spark_conf, 'log4j.properties'),
         owner=params.spark_user,
         group=params.spark_group,
         content=params.spark_log4j_properties)

    #create metrics.properties in etc/conf dir
    File(os.path.join(params.spark_conf, 'metrics.properties'),
         owner=params.spark_user,
         group=params.spark_group,
         content=InlineTemplate(params.spark_metrics_properties))

    File(os.path.join(params.spark_conf, 'java-opts'),
         owner=params.spark_user,
         group=params.spark_group,
         content=params.spark_javaopts_properties)

    if params.is_hive_installed:
        hive_config = get_hive_config()
        XmlConfig("hive-site.xml",
                  conf_dir=params.spark_conf,
                  configurations=hive_config,
                  owner=params.spark_user,
                  group=params.spark_group,
                  mode=0644)
Пример #12
0
def setup_custom_scratchdir():
    import params
    if not is_empty(
            params.hive_exec_scratchdir
    ) and not params.hive_exec_scratchdir.startswith(
            "/tmp"
    ):  # If this property is custom and not a variation of the writable temp dir
        params.HdfsDirectory(
            params.hive_exec_scratchdir,
            action="create_delayed",
            owner=params.hive_user,
            group=params.hdfs_user,
            mode=0777
        )  # Hive expects this dir to be writeable by everyone as it is used as a temp dir
Пример #13
0
def setup_spark(env, type, action=None):
    import params

    Directory([params.spark_pid_dir, params.spark_log_dir],
              owner=params.spark_user,
              group=params.user_group,
              recursive=True)
    if type == 'server' and action == 'config':
        params.HdfsDirectory(params.spark_hdfs_user_dir,
                             action="create",
                             owner=params.spark_user,
                             mode=0775)

    PropertiesFile(
        format("{spark_conf}/spark-defaults.conf"),
        properties=params.config['configurations']['spark-defaults'],
        key_value_delimiter=" ",
    )

    # create spark-env.sh in etc/conf dir
    File(os.path.join(params.spark_conf, 'spark-env.sh'),
         owner=params.spark_user,
         group=params.spark_group,
         content=InlineTemplate(params.spark_env_sh))

    #create log4j.properties in etc/conf dir
    File(os.path.join(params.spark_conf, 'log4j.properties'),
         owner=params.spark_user,
         group=params.spark_group,
         content=params.spark_log4j_properties)

    #create metrics.properties in etc/conf dir
    File(os.path.join(params.spark_conf, 'metrics.properties'),
         owner=params.spark_user,
         group=params.spark_group,
         content=InlineTemplate(params.spark_metrics_properties))

    File(os.path.join(params.spark_conf, 'java-opts'),
         owner=params.spark_user,
         group=params.spark_group,
         content=params.spark_javaopts_properties)

    if params.is_hive_installed:
        XmlConfig("hive-site.xml",
                  conf_dir=params.spark_conf,
                  configurations=params.spark_hive_properties,
                  owner=params.spark_user,
                  group=params.spark_group,
                  mode=0644)
def _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed):
  """
  :param source_and_dest_pairs: List of tuples (x, y), where x is the source file in the local file system,
  and y is the destination file path in HDFS
  :param file_owner: Owner to set for the file copied to HDFS (typically hdfs account)
  :param group_owner: Owning group to set for the file copied to HDFS (typically hadoop group)
  :param kinit_if_needed: kinit command if it is needed, otherwise an empty string
  :return: Returns 0 if at least one file was copied and no exceptions occurred, and 1 otherwise.

  Must kinit before calling this function.
  """
  import params

  return_value = 1
  if source_and_dest_pairs and len(source_and_dest_pairs) > 0:
    return_value = 0
    for (source, destination) in source_and_dest_pairs:
      try:
        destination_dir = os.path.dirname(destination)

        params.HdfsDirectory(destination_dir,
                             action="create",
                             owner=file_owner,
                             mode=0555
        )

        CopyFromLocal(source,
                      mode=0444,
                      owner=file_owner,
                      group=group_owner,
                      dest_dir=destination_dir,
                      kinnit_if_needed=kinit_if_needed,
                      hdfs_user=params.hdfs_user,
                      hadoop_bin_dir=params.hadoop_bin_dir,
                      hadoop_conf_dir=params.hadoop_conf_dir
        )
      except:
        return_value = 1
  return return_value
Пример #15
0
def mapreduce(name=None):
    import params

    if name in ["jobtracker", "historyserver"]:
        params.HdfsDirectory("/mapred",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/system",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/history",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)
        params.HdfsDirectory(params.mapreduce_jobhistory_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)
        params.HdfsDirectory(None, action="create")

    Directory([params.mapred_pid_dir, params.mapred_log_dir],
              owner=params.mapred_user,
              group=params.user_group,
              recursive=True)

    Directory(params.mapred_local_dir.split(','),
              owner=params.mapred_user,
              mode=0755,
              recursive=True)

    File(
        params.exclude_file_path,
        owner=params.mapred_user,
        group=params.user_group,
    )

    File(
        params.mapred_hosts_file_path,
        owner=params.mapred_user,
        group=params.user_group,
    )
Пример #16
0
def oozie(is_server=False):
    import params

    if is_server:
        params.HdfsDirectory(params.oozie_hdfs_user_dir,
                             action="create",
                             owner=params.oozie_user,
                             mode=params.oozie_hdfs_user_mode)
    Directory(params.conf_dir,
              recursive=True,
              owner=params.oozie_user,
              group=params.user_group)
    XmlConfig(
        "oozie-site.xml",
        conf_dir=params.conf_dir,
        configurations=params.oozie_site,
        configuration_attributes=params.config['configuration_attributes']
        ['oozie-site'],
        owner=params.oozie_user,
        group=params.user_group,
        mode=0664)
    File(format("{conf_dir}/oozie-env.sh"),
         owner=params.oozie_user,
         content=InlineTemplate(params.oozie_env_sh_template))

    if (params.log4j_props != None):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user,
             content=params.log4j_props)
    elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user)

    if params.hdp_stack_version != "" and compare_versions(
            params.hdp_stack_version, '2.2') >= 0:
        File(format("{params.conf_dir}/adminusers.txt"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user,
             content=Template('adminusers.txt.j2',
                              oozie_user=params.oozie_user))
    else:
        File(format("{params.conf_dir}/adminusers.txt"),
             owner=params.oozie_user,
             group=params.user_group)

    if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \
       params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \
       params.jdbc_driver_name == "org.postgresql.Driver" or \
       params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver":
        File(
            format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
            content=DownloadSource(
                format("{jdk_location}{check_db_connection_jar_name}")),
        )
    pass

    oozie_ownership()

    if is_server:
        oozie_server_specific()
Пример #17
0
def yarn(name = None):
  import params


  if name in ["nodemanager","historyserver"]:
    if params.yarn_log_aggregation_enabled:
      params.HdfsDirectory(params.yarn_nm_app_log_dir,
                           action="create_delayed",
                           owner=params.yarn_user,
                           group=params.user_group,
                           mode=0777,
                           recursive_chmod=True
      )
    params.HdfsDirectory("/mapred",
                         action="create_delayed",
                         owner=params.mapred_user
    )
    params.HdfsDirectory("/mapred/system",
                         action="create_delayed",
                         owner=params.hdfs_user
    )
    params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir,
                         action="create_delayed",
                         owner=params.mapred_user,
                         group=params.user_group,
                         mode=0777
    )

    params.HdfsDirectory(params.mapreduce_jobhistory_done_dir,
                         action="create_delayed",
                         owner=params.mapred_user,
                         group=params.user_group,
                         mode=01777
    )
    params.HdfsDirectory(None, action="create")

  if name == "nodemanager":
    Directory(params.nm_local_dirs.split(',') + params.nm_log_dirs.split(','),
              owner=params.yarn_user,
              recursive=True,
              ignore_failures=True,
              )

  Directory([params.yarn_pid_dir, params.yarn_log_dir],
            owner=params.yarn_user,
            group=params.user_group,
            recursive=True
  )

  Directory([params.mapred_pid_dir, params.mapred_log_dir],
            owner=params.mapred_user,
            group=params.user_group,
            recursive=True
  )
  Directory([params.yarn_log_dir_prefix],
            owner=params.yarn_user,
            recursive=True,
            ignore_failures=True,
  )

  XmlConfig("core-site.xml",
            conf_dir=params.config_dir,
            configurations=params.config['configurations']['core-site'],
            configuration_attributes=params.config['configuration_attributes']['core-site'],
            owner=params.hdfs_user,
            group=params.user_group,
            mode=0644
  )

  XmlConfig("mapred-site.xml",
            conf_dir=params.config_dir,
            configurations=params.config['configurations']['mapred-site'],
            configuration_attributes=params.config['configuration_attributes']['mapred-site'],
            owner=params.yarn_user,
            group=params.user_group,
            mode=0644
  )

  XmlConfig("yarn-site.xml",
            conf_dir=params.config_dir,
            configurations=params.config['configurations']['yarn-site'],
            configuration_attributes=params.config['configuration_attributes']['yarn-site'],
            owner=params.yarn_user,
            group=params.user_group,
            mode=0644
  )

  XmlConfig("capacity-scheduler.xml",
            conf_dir=params.config_dir,
            configurations=params.config['configurations']['capacity-scheduler'],
            configuration_attributes=params.config['configuration_attributes']['capacity-scheduler'],
            owner=params.yarn_user,
            group=params.user_group,
            mode=0644
  )

  if name == 'resourcemanager':
    File(params.yarn_job_summary_log,
       owner=params.yarn_user,
       group=params.user_group
    )
  elif name == 'apptimelineserver':
    Directory(params.ats_leveldb_dir,
       owner=params.yarn_user,
       group=params.user_group,
       recursive=True
    )

  File(params.rm_nodes_exclude_path,
       owner=params.yarn_user,
       group=params.user_group
  )

  File(format("{limits_conf_dir}/yarn.conf"),
       mode=0644,
       content=Template('yarn.conf.j2')
  )

  File(format("{limits_conf_dir}/mapreduce.conf"),
       mode=0644,
       content=Template('mapreduce.conf.j2')
  )

  File(format("{config_dir}/yarn-env.sh"),
       owner=params.yarn_user,
       group=params.user_group,
       mode=0755,
       content=InlineTemplate(params.yarn_env_sh_template)
  )

  if params.security_enabled:
    container_executor = format("{yarn_container_bin}/container-executor")
    File(container_executor,
         group=params.yarn_executor_container_group,
         mode=06050
    )

    File(format("{config_dir}/container-executor.cfg"),
         group=params.user_group,
         mode=0644,
         content=Template('container-executor.cfg.j2')
    )


  if params.security_enabled:
    tc_mode = 0644
    tc_owner = "root"
  else:
    tc_mode = None
    tc_owner = params.hdfs_user

  File(format("{config_dir}/mapred-env.sh"),
       owner=tc_owner,
       content=InlineTemplate(params.mapred_env_sh_template)
  )

  if params.security_enabled:
    File(os.path.join(params.hadoop_bin, "task-controller"),
         owner="root",
         group=params.mapred_tt_group,
         mode=06050
    )
    File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'),
         owner = tc_owner,
         mode = tc_mode,
         group = params.mapred_tt_group,
         content=Template("taskcontroller.cfg.j2")
    )
  else:
    File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'),
         owner=tc_owner,
         content=Template("taskcontroller.cfg.j2")
    )

  if "mapred-site" in params.config['configurations']:
    XmlConfig("mapred-site.xml",
              conf_dir=params.hadoop_conf_dir,
              configurations=params.config['configurations']['mapred-site'],
              configuration_attributes=params.config['configuration_attributes']['mapred-site'],
              owner=params.mapred_user,
              group=params.user_group
    )

  if "capacity-scheduler" in params.config['configurations']:
    XmlConfig("capacity-scheduler.xml",
              conf_dir=params.hadoop_conf_dir,
              configurations=params.config['configurations'][
                'capacity-scheduler'],
              configuration_attributes=params.config['configuration_attributes']['capacity-scheduler'],
              owner=params.hdfs_user,
              group=params.user_group
    )

  if os.path.exists(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml')):
    File(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml'),
         owner=params.mapred_user,
         group=params.user_group
    )

  if os.path.exists(
    os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example')):
    File(os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example'),
         owner=params.mapred_user,
         group=params.user_group
    )

  if os.path.exists(
    os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example')):
    File(os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example'),
         owner=params.mapred_user,
         group=params.user_group
    )
Пример #18
0
def hbase(
        name=None  # 'master' or 'regionserver' or 'client'
):
    import params

    Directory(params.hbase_conf_dir,
              owner=params.hbase_user,
              group=params.user_group,
              recursive=True)

    Directory(params.tmp_dir, owner=params.hbase_user, recursive=True)

    Directory(os.path.join(params.local_dir, "jars"),
              owner=params.hbase_user,
              group=params.user_group,
              mode=0775,
              recursive=True)

    XmlConfig("hbase-site.xml",
              conf_dir=params.hbase_conf_dir,
              configurations=params.config['configurations']['hbase-site'],
              owner=params.hbase_user,
              group=params.user_group)

    XmlConfig("hdfs-site.xml",
              conf_dir=params.hbase_conf_dir,
              configurations=params.config['configurations']['hdfs-site'],
              owner=params.hbase_user,
              group=params.user_group)

    XmlConfig("hdfs-site.xml",
              conf_dir=params.hadoop_conf_dir,
              configurations=params.config['configurations']['hdfs-site'],
              owner=params.hdfs_user,
              group=params.user_group)

    if 'hbase-policy' in params.config['configurations']:
        XmlConfig(
            "hbase-policy.xml",
            conf_dir=params.hbase_conf_dir,
            configurations=params.config['configurations']['hbase-policy'],
            owner=params.hbase_user,
            group=params.user_group)
    # Manually overriding ownership of file installed by hadoop package
    else:
        File(format("{params.hbase_conf_dir}/hbase-policy.xml"),
             owner=params.hbase_user,
             group=params.user_group)

    hbase_TemplateConfig('hbase-env.sh')

    hbase_TemplateConfig(
        params.metric_prop_file_name,
        tag='GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS')

    hbase_TemplateConfig('regionservers')

    if params.security_enabled:
        hbase_TemplateConfig(format("hbase_{name}_jaas.conf"))

    if name != "client":
        Directory(params.pid_dir, owner=params.hbase_user, recursive=True)

        Directory(params.log_dir, owner=params.hbase_user, recursive=True)

    if (params.log4j_props != None):
        File(format("{params.hbase_conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.hbase_user,
             content=params.log4j_props)
    elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))):
        File(format("{params.hbase_conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.hbase_user)
    if name in ["master", "regionserver"]:
        params.HdfsDirectory(params.hbase_hdfs_root_dir,
                             action="create_delayed",
                             owner=params.hbase_user)
        params.HdfsDirectory(params.hbase_staging_dir,
                             action="create_delayed",
                             owner=params.hbase_user,
                             mode=0711)
        params.HdfsDirectory(None, action="create")
Пример #19
0
def mapreduce(name=None):
    import params

    if name in ["jobtracker", "historyserver"]:
        params.HdfsDirectory("/mapred",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/system",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/history",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)
        params.HdfsDirectory(params.mapreduce_jobhistory_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)
        params.HdfsDirectory(None, action="create")

    Directory(params.mapred_pid_dir,
              owner=params.mapred_user,
              group=params.user_group,
              recursive=True)

    mapred_log_dir = os.path.join(params.mapred_log_dir_prefix,
                                  params.mapred_user)
    mapred_userlogs_dir = os.path.join(mapred_log_dir, "userlogs")

    Directory(mapred_log_dir,
              recursive=True,
              owner=params.mapred_user,
              group=params.user_group)
    Directory(mapred_userlogs_dir, recursive=True, mode=01777)
    if name == 'jobtracker':
        File(os.path.join(mapred_log_dir, 'hadoop-mapreduce.jobsummary.log'),
             owner=params.mapred_user,
             group=params.user_group,
             mode=0664)

    Directory(params.mapred_local_dir.split(','),
              owner=params.mapred_user,
              mode=0755,
              recursive=True,
              ignore_failures=True)

    File(
        params.exclude_file_path,
        owner=params.mapred_user,
        group=params.user_group,
    )

    File(
        params.mapred_hosts_file_path,
        owner=params.mapred_user,
        group=params.user_group,
    )

    if params.security_enabled:
        tc_mode = 0644
        tc_owner = "root"
    else:
        tc_mode = None
        tc_owner = params.hdfs_user

    if params.security_enabled:
        File(os.path.join(params.hadoop_bin, "task-controller"),
             owner="root",
             group=params.mapred_tt_group,
             mode=06050)
        File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'),
             owner=tc_owner,
             mode=tc_mode,
             group=params.mapred_tt_group,
             content=Template("taskcontroller.cfg.j2"))
    else:
        File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'),
             owner=tc_owner,
             content=Template("taskcontroller.cfg.j2"))

    if "capacity-scheduler" in params.config['configurations']:
        XmlConfig("capacity-scheduler.xml",
                  conf_dir=params.hadoop_conf_dir,
                  configurations=params.config['configurations']
                  ['capacity-scheduler'],
                  owner=params.hdfs_user,
                  group=params.user_group)

    if "mapred-queue-acls" in params.config['configurations']:
        XmlConfig("mapred-queue-acls.xml",
                  conf_dir=params.hadoop_conf_dir,
                  configurations=params.config['configurations']
                  ['mapred-queue-acls'],
                  owner=params.mapred_user,
                  group=params.user_group)
    elif os.path.exists(
            os.path.join(params.hadoop_conf_dir, "mapred-queue-acls.xml")):
        File(os.path.join(params.hadoop_conf_dir, "mapred-queue-acls.xml"),
             owner=params.mapred_user,
             group=params.user_group)

    if "mapred-site" in params.config['configurations']:
        XmlConfig(
            "mapred-site.xml",
            conf_dir=params.hadoop_conf_dir,
            configurations=params.config['configurations']['mapred-site'],
            owner=params.mapred_user,
            group=params.user_group)

    if os.path.exists(
            os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml')):
        File(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml'),
             owner=params.mapred_user,
             group=params.user_group)

    if os.path.exists(
            os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example')):
        File(os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example'),
             owner=params.mapred_user,
             group=params.user_group)

    if os.path.exists(
            os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example')):
        File(os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example'),
             owner=params.mapred_user,
             group=params.user_group)
Пример #20
0
def yarn(name=None):
    import params

    if name in ["nodemanager", "historyserver"]:
        if params.yarn_log_aggregation_enabled:
            params.HdfsDirectory(params.yarn_nm_app_log_dir,
                                 action="create_delayed",
                                 owner=params.yarn_user,
                                 group=params.user_group,
                                 mode=0777,
                                 recursive_chmod=True)
        params.HdfsDirectory("/mapred",
                             action="create_delayed",
                             owner=params.mapred_user)
        params.HdfsDirectory("/mapred/system",
                             action="create_delayed",
                             owner=params.hdfs_user)
        params.HdfsDirectory(params.mapreduce_jobhistory_intermediate_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=0777)

        params.HdfsDirectory(params.mapreduce_jobhistory_done_dir,
                             action="create_delayed",
                             owner=params.mapred_user,
                             group=params.user_group,
                             mode=01777)
        params.HdfsDirectory(None, action="create")

    if name == "nodemanager":
        Directory(params.nm_local_dirs.split(','),
                  owner=params.yarn_user,
                  create_parents=True)
        Directory(params.nm_log_dirs.split(','),
                  owner=params.yarn_user,
                  create_parents=True)

    Directory([params.yarn_pid_dir, params.yarn_log_dir],
              owner=params.yarn_user,
              group=params.user_group,
              create_parents=True)

    Directory([params.mapred_pid_dir, params.mapred_log_dir],
              owner=params.mapred_user,
              group=params.user_group,
              create_parents=True)

    Directory(params.yarn_log_dir_prefix,
              owner=params.yarn_user,
              create_parents=True)

    XmlConfig("core-site.xml",
              conf_dir=params.config_dir,
              configurations=params.config['configurations']['core-site'],
              configuration_attributes=params.config['configurationAttributes']
              ['core-site'],
              owner=params.hdfs_user,
              group=params.user_group,
              mode=0644)

    XmlConfig("mapred-site.xml",
              conf_dir=params.config_dir,
              configurations=params.config['configurations']['mapred-site'],
              configuration_attributes=params.config['configurationAttributes']
              ['mapred-site'],
              owner=params.yarn_user,
              group=params.user_group,
              mode=0644)

    XmlConfig("yarn-site.xml",
              conf_dir=params.config_dir,
              configurations=params.config['configurations']['yarn-site'],
              configuration_attributes=params.config['configurationAttributes']
              ['yarn-site'],
              owner=params.yarn_user,
              group=params.user_group,
              mode=0644)

    XmlConfig(
        "capacity-scheduler.xml",
        conf_dir=params.config_dir,
        configurations=params.config['configurations']['capacity-scheduler'],
        configuration_attributes=params.config['configurationAttributes']
        ['capacity-scheduler'],
        owner=params.yarn_user,
        group=params.user_group,
        mode=0644)

    if name == 'resourcemanager':
        File(params.yarn_job_summary_log,
             owner=params.yarn_user,
             group=params.user_group)

    File(params.rm_nodes_exclude_path,
         owner=params.yarn_user,
         group=params.user_group)

    File(format("{limits_conf_dir}/yarn.conf"),
         mode=0644,
         content=Template('yarn.conf.j2'))

    File(format("{limits_conf_dir}/mapreduce.conf"),
         mode=0644,
         content=Template('mapreduce.conf.j2'))

    File(format("{config_dir}/yarn-env.sh"),
         owner=params.yarn_user,
         group=params.user_group,
         mode=0755,
         content=Template('yarn-env.sh.j2'))

    if params.security_enabled:
        container_executor = format("{yarn_container_bin}/container-executor")
        File(container_executor,
             group=params.yarn_executor_container_group,
             mode=06050)

        File(format("{config_dir}/container-executor.cfg"),
             group=params.user_group,
             mode=0644,
             content=Template('container-executor.cfg.j2'))
Пример #21
0
def hbase(
        name=None  # 'master' or 'regionserver' or 'client'
):
    import params

    Directory(params.hbase_conf_dir,
              owner=params.hbase_user,
              group=params.user_group,
              recursive=True)

    Directory(params.hbase_tmp_dir,
              owner=params.hbase_user,
              cd_access="a",
              recursive=True)

    Directory(os.path.join(params.local_dir, "jars"),
              owner=params.hbase_user,
              group=params.user_group,
              cd_access="a",
              mode=0775,
              recursive=True)

    merged_ams_hbase_site = {}
    merged_ams_hbase_site.update(
        params.config['configurations']['ams-hbase-site'])
    if params.security_enabled:
        merged_ams_hbase_site.update(
            params.config['configurations']['ams-hbase-security-site'])

    XmlConfig(
        "hbase-site.xml",
        conf_dir=params.hbase_conf_dir,
        configurations=merged_ams_hbase_site,
        configuration_attributes=params.config['configuration_attributes']
        ['ams-hbase-site'],
        owner=params.hbase_user,
        group=params.user_group)

    # Phoenix spool file dir if not /tmp
    if not os.path.exists(params.phoenix_server_spool_dir):
        Directory(params.phoenix_server_spool_dir,
                  owner=params.ams_user,
                  mode=0755,
                  group=params.user_group,
                  cd_access="a",
                  recursive=True)
    pass

    if 'ams-hbase-policy' in params.config['configurations']:
        XmlConfig(
            "hbase-policy.xml",
            conf_dir=params.hbase_conf_dir,
            configurations=params.config['configurations']['ams-hbase-policy'],
            configuration_attributes=params.config['configuration_attributes']
            ['ams-hbase-policy'],
            owner=params.hbase_user,
            group=params.user_group)
    # Manually overriding ownership of file installed by hadoop package
    else:
        File(format("{params.hbase_conf_dir}/hbase-policy.xml"),
             owner=params.hbase_user,
             group=params.user_group)

    File(format("{hbase_conf_dir}/hbase-env.sh"),
         owner=params.hbase_user,
         content=InlineTemplate(params.hbase_env_sh_template))

    # Metrics properties
    File(os.path.join(params.hbase_conf_dir,
                      "hadoop-metrics2-hbase.properties"),
         owner=params.hbase_user,
         group=params.user_group,
         content=Template("hadoop-metrics2-hbase.properties.j2"))

    # hbase_TemplateConfig( params.metric_prop_file_name,
    #   tag = 'GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS'
    # )

    hbase_TemplateConfig('regionservers', user=params.hbase_user)

    if params.security_enabled:
        hbase_TemplateConfig(format("hbase_{name}_jaas.conf"),
                             user=params.hbase_user)
        hbase_TemplateConfig(format("hbase_client_jaas.conf"),
                             user=params.hbase_user)
        hbase_TemplateConfig(format("ams_zookeeper_jaas.conf"),
                             user=params.hbase_user)

    if name in ["master", "regionserver"]:

        if params.is_hbase_distributed:

            params.HdfsDirectory(params.hbase_root_dir,
                                 action="create_delayed",
                                 owner=params.hbase_user,
                                 mode=0775)

            params.HdfsDirectory(params.hbase_staging_dir,
                                 action="create_delayed",
                                 owner=params.hbase_user,
                                 mode=0711)

            params.HdfsDirectory(None, action="create")

        else:

            local_root_dir = params.hbase_root_dir
            #cut protocol name
            if local_root_dir.startswith("file://"):
                local_root_dir = local_root_dir[7:]
                #otherwise assume dir name is provided as is

            Directory(local_root_dir,
                      owner=params.hbase_user,
                      cd_access="a",
                      recursive=True)

    if name != "client":
        Directory(params.hbase_pid_dir,
                  owner=params.hbase_user,
                  recursive=True)

        Directory(params.hbase_log_dir,
                  owner=params.hbase_user,
                  recursive=True)

    if params.hbase_log4j_props is not None:
        File(format("{params.hbase_conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.hbase_user,
             content=params.hbase_log4j_props)
    elif os.path.exists(format("{params.hbase_conf_dir}/log4j.properties")):
        File(format("{params.hbase_conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.hbase_user)
Пример #22
0
def accumulo(
        name=None  # 'master' or 'tserver' or 'client'
):
    import params

    Directory(params.accumulo_conf_dir,
              owner=params.accumulo_user,
              recursive=True)

    XmlConfig(
        "accumulo-site.xml",
        conf_dir=params.accumulo_conf_dir,
        configurations=params.config['configurations']['accumulo-site'],
        configuration_attributes=params.config['configuration_attributes']
        ['accumulo-site'],
        owner=params.accumulo_user,
        mode=0600)

    XmlConfig(
        "hdfs-site.xml",
        conf_dir=params.hadoop_conf_dir,
        configurations=params.config['configurations']['hdfs-site'],
        configuration_attributes=params.config['configuration_attributes']
        ['hdfs-site'],
        owner=params.hdfs_user,
    )

    if 'accumulo-policy' in params.config['configurations']:
        XmlConfig(
            "accumulo-policy.xml",
            conf_dir=params.accumulo_conf_dir,
            configurations=params.config['configurations']['accumulo-policy'],
            configuration_attributes=params.config['configuration_attributes']
            ['accumulo-policy'],
            owner=params.accumulo_user,
        )
    else:
        File(
            format("{params.accumulo_conf_dir}/accumulo-policy.xml"),
            owner=params.accumulo_user,
        )

    Directory(params.log_dir, owner=params.accumulo_user, recursive=True)

    if (params.log4j_props != None):
        File(format("{params.accumulo_conf_dir}/log4j.properties"),
             mode=0644,
             owner=params.accumulo_user,
             content=params.log4j_props)
    elif (os.path.exists(
            format("{params.accumulo_conf_dir}/log4j.properties"))):
        File(format("{params.accumulo_conf_dir}/log4j.properties"),
             mode=0644,
             owner=params.accumulo_user)

    if name in ["master", "tserver"]:
        params.HdfsDirectory(
            format("{params.accumulo_hdfs_root_dir}"),
            action="create_delayed",
            owner=params.accumulo_user,
        )
        params.HdfsDirectory(format("{params.accumulo_hdfs_stage_dir}"),
                             action="create_delayed",
                             owner=params.accumulo_user,
                             mode=0751)
        params.HdfsDirectory(None, action="create")

    accumulo_StaticFile("auditLog.xml")
    accumulo_StaticFile("generic_logger.xml")
    accumulo_StaticFile("monitor_logger.xml")
    accumulo_StaticFile("accumulo-metrics.xml")

    accumulo_StaticFile("tracers")
    accumulo_StaticFile("gc")
    accumulo_StaticFile("monitor")
    accumulo_StaticFile('slaves')
    accumulo_StaticFile('masters')

    accumulo_TemplateConfig('accumulo-env.sh')
Пример #23
0
def hive(name=None):
    import params

    if name == "hiveserver2":
        params.HdfsDirectory(params.hive_apps_whs_dir,
                             action="create_delayed",
                             owner=params.hive_user,
                             mode=0777)
        params.HdfsDirectory(params.hive_hdfs_user_dir,
                             action="create_delayed",
                             owner=params.hive_user,
                             mode=params.hive_hdfs_user_mode)
        params.HdfsDirectory(None, action="create")
    if name == 'metastore' or name == 'hiveserver2':
        hive_config_dir = params.hive_server_conf_dir
        config_file_mode = 0600
        jdbc_connector()
    else:
        hive_config_dir = params.hive_conf_dir
        config_file_mode = 0644

    Directory(hive_config_dir,
              owner=params.hive_user,
              group=params.user_group,
              recursive=True)

    XmlConfig("hive-site.xml",
              conf_dir=hive_config_dir,
              configurations=params.config['configurations']['hive-site'],
              owner=params.hive_user,
              group=params.user_group,
              mode=config_file_mode)

    cmd = format(
        "/bin/sh -c 'cd /usr/lib/ambari-agent/ && curl -kf --retry 5 "
        "{jdk_location}{check_db_connection_jar_name} -o {check_db_connection_jar_name}'"
    )

    Execute(cmd, not_if=format("[ -f {check_db_connection_jar_name}]"))

    if name == 'metastore':
        File(params.start_metastore_path,
             mode=0755,
             content=StaticFile('startMetastore.sh'))

    elif name == 'hiveserver2':
        File(params.start_hiveserver2_path,
             mode=0755,
             content=StaticFile('startHiveserver2.sh'))

    if name != "client":
        crt_directory(params.hive_pid_dir)
        crt_directory(params.hive_log_dir)
        crt_directory(params.hive_var_lib)

    File(format("{hive_config_dir}/hive-env.sh"),
         owner=params.hive_user,
         group=params.user_group,
         content=Template('hive-env.sh.j2', conf_dir=hive_config_dir))

    crt_file(format("{hive_conf_dir}/hive-default.xml.template"))
    crt_file(format("{hive_conf_dir}/hive-env.sh.template"))

    log4j_exec_filename = 'hive-exec-log4j.properties'
    if (params.log4j_exec_props != None):
        File(format("{params.hive_conf_dir}/{log4j_exec_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.hive_user,
             content=params.log4j_exec_props)
    elif (os.path.exists(
            "{params.hive_conf_dir}/{log4j_exec_filename}.template")):
        File(format("{params.hive_conf_dir}/{log4j_exec_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.hive_user,
             content=StaticFile(
                 format(
                     "{params.hive_conf_dir}/{log4j_exec_filename}.template")))

    log4j_filename = 'hive-log4j.properties'
    if (params.log4j_props != None):
        File(format("{params.hive_conf_dir}/{log4j_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.hive_user,
             content=params.log4j_props)
    elif (os.path.exists("{params.hive_conf_dir}/{log4j_filename}.template")):
        File(format("{params.hive_conf_dir}/{log4j_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.hive_user,
             content=StaticFile(
                 format("{params.hive_conf_dir}/{log4j_filename}.template")))
def scdf(name=None):
    import params

    if name == "server":
        params.HdfsDirectory(params.deployer_dir,
                             action=params.action_create_delayed,
                             owner=params.scdf_user,
                             mode=0777)
        params.HdfsDirectory(params.scdf_hdfs_user_dir,
                             action=params.action_create_delayed,
                             owner=params.scdf_user,
                             mode=0777)
        params.HdfsDirectory(None, action=params.action_create)

    try:
        Directory(params.log_dir,
                  owner=params.scdf_user,
                  group=params.user_group,
                  mode=0775,
                  recursive=True)
    except Fail:
        Directory(params.log_dir,
                  owner=params.scdf_user,
                  group=params.user_group,
                  mode=0775,
                  create_parents=True)

    try:
        Directory([params.pid_dir, params.data_dir, params.conf_dir],
                  owner=params.scdf_user,
                  group=params.user_group,
                  recursive=True)
    except Fail:
        Directory([params.pid_dir, params.data_dir, params.conf_dir],
                  owner=params.scdf_user,
                  group=params.user_group,
                  create_parents=True)

    dfs_ha_map = {}
    if params.dfs_ha_enabled:
        for nn_id in params.dfs_ha_namemodes_ids_list:
            nn_host = params.config['configurations']['hdfs-site'][format(
                'dfs.namenode.rpc-address.{dfs_ha_nameservices}.{nn_id}')]
            dfs_ha_map[nn_id] = nn_host

    configurations = params.config['configurations']['scdf-site']
    sec_filtered_map = {}
    for key, value in configurations.iteritems():
        if "security" in value:
            sec_filtered_map[key] = value

    File(format("{conf_dir}/servers.yml"),
         content=Template("servers.yml.j2",
                          extra_imports=[escape_yaml_property],
                          dfs_ha_map=dfs_ha_map,
                          configurations=configurations),
         owner=params.scdf_user,
         group=params.user_group)

    File(format("{conf_dir}/scdf_kafka_jaas.conf"),
         content=Template("scdf_kafka_jaas.conf.j2",
                          configurations=configurations),
         owner=params.scdf_user,
         group=params.user_group)

    File(format("{conf_dir}/scdf-shell.init"),
         content=Template("scdf-shell.init.j2", dfs_ha_map=dfs_ha_map),
         owner=params.scdf_user,
         group=params.user_group)

    File(format("{conf_dir}/hadoop.properties"),
         content=Template("hadoop.properties.j2",
                          dfs_ha_map=dfs_ha_map,
                          sec_filtered_map=sec_filtered_map),
         owner=params.scdf_user,
         group=params.user_group)

    File(format("{conf_dir}/scdf-server-env.sh"),
         owner=params.scdf_user,
         content=InlineTemplate(params.scdf_server_env_sh_template))

    File(format("{conf_dir}/scdf-shell-env.sh"),
         owner=params.scdf_user,
         content=InlineTemplate(params.scdf_shell_env_sh_template))
def _copy_files(source_and_dest_pairs, component_user, file_owner, group_owner,
                kinit_if_needed):
    """
  :param source_and_dest_pairs: List of tuples (x, y), where x is the source file in the local file system,
  and y is the destination file path in HDFS
  :param component_user:  User that will execute the Hadoop commands, usually smokeuser
  :param file_owner: Owner to set for the file copied to HDFS (typically hdfs account)
  :param group_owner: Owning group to set for the file copied to HDFS (typically hadoop group)
  :param kinit_if_needed: kinit command if it is needed, otherwise an empty string
  :return: Returns 0 if at least one file was copied and no exceptions occurred, and 1 otherwise.

  Must kinit before calling this function.
  """
    import params

    return_value = 1
    if source_and_dest_pairs and len(source_and_dest_pairs) > 0:
        return_value = 0
        for (source, destination) in source_and_dest_pairs:
            try:
                destination_dir = os.path.dirname(destination)

                params.HdfsDirectory(
                    destination_dir,
                    action="create",
                    owner=file_owner,
                    hdfs_user=params.
                    hdfs_user,  # this will be the user to run the commands as
                    mode=0555)

                # Because CopyFromLocal does not guarantee synchronization, it's possible for two processes to first attempt to
                # copy the file to a temporary location, then process 2 fails because the temporary file was already created by
                # process 1, so process 2 tries to clean up by deleting the temporary file, and then process 1
                # cannot finish the copy to the final destination, and both fail!
                # For this reason, the file name on the destination must be unique, and we then rename it to the intended value.
                # The rename operation is synchronized by the Namenode.
                orig_dest_file_name = os.path.split(destination)[1]
                unique_string = str(uuid.uuid4())[:8]
                new_dest_file_name = orig_dest_file_name + "." + unique_string
                new_destination = os.path.join(destination_dir,
                                               new_dest_file_name)
                CopyFromLocal(
                    source,
                    mode=0444,
                    owner=file_owner,
                    group=group_owner,
                    user=params.
                    hdfs_user,  # this will be the user to run the commands as
                    dest_dir=destination_dir,
                    dest_file=new_dest_file_name,
                    kinnit_if_needed=kinit_if_needed,
                    hdfs_user=params.hdfs_user,
                    hadoop_bin_dir=params.hadoop_bin_dir,
                    hadoop_conf_dir=params.hadoop_conf_dir)

                mv_command = format("fs -mv {new_destination} {destination}")
                ExecuteHadoop(mv_command,
                              user=params.hdfs_user,
                              bin_dir=params.hadoop_bin_dir,
                              conf_dir=params.hadoop_conf_dir)
            except Exception, e:
                Logger.error(
                    "Failed to copy file. Source: %s, Destination: %s. Error: %s"
                    % (source, destination, e.message))
                return_value = 1
Пример #26
0
def setup_conf_dir(
    name=None
):  # 'master' or 'tserver' or 'monitor' or 'gc' or 'tracer' or 'client'
    import params

    # create the conf directory
    Directory(params.conf_dir,
              mode=0755,
              owner=params.accumulo_user,
              group=params.user_group,
              recursive=True)

    if name == 'client':
        dest_conf_dir = params.conf_dir

        # create a site file for client processes
        configs = {}
        configs.update(params.config['configurations']['accumulo-site'])
        if "instance.secret" in configs:
            configs.pop("instance.secret")
        if "trace.token.property.password" in configs:
            configs.pop("trace.token.property.password")
        XmlConfig(
            "accumulo-site.xml",
            conf_dir=dest_conf_dir,
            configurations=configs,
            configuration_attributes=params.config['configuration_attributes']
            ['accumulo-site'],
            owner=params.accumulo_user,
            group=params.user_group,
            mode=0644)

        # create env file
        File(format("{dest_conf_dir}/accumulo-env.sh"),
             mode=0644,
             group=params.user_group,
             owner=params.accumulo_user,
             content=InlineTemplate(params.env_sh_template))
    else:
        dest_conf_dir = params.server_conf_dir
        # create server conf directory
        Directory(params.server_conf_dir,
                  mode=0700,
                  owner=params.accumulo_user,
                  group=params.user_group,
                  recursive=True)
        # create a site file for server processes
        configs = {}
        configs.update(params.config['configurations']['accumulo-site'])
        configs["instance.secret"] = str(
            params.config['configurations']['accumulo-env']['instance_secret'])
        configs["trace.token.property.password"] = str(params.trace_password)
        XmlConfig(
            "accumulo-site.xml",
            conf_dir=dest_conf_dir,
            configurations=configs,
            configuration_attributes=params.config['configuration_attributes']
            ['accumulo-site'],
            owner=params.accumulo_user,
            group=params.user_group,
            mode=0600)

        # create pid dir
        Directory(params.pid_dir,
                  owner=params.accumulo_user,
                  group=params.user_group,
                  recursive=True)

        # create log dir
        Directory(params.log_dir,
                  owner=params.accumulo_user,
                  group=params.user_group,
                  recursive=True)

        # create env file
        File(format("{dest_conf_dir}/accumulo-env.sh"),
             mode=0644,
             group=params.user_group,
             owner=params.accumulo_user,
             content=InlineTemplate(params.server_env_sh_template))

    # create client.conf file
    configs = {}
    configs["instance.name"] = params.instance_name
    configs["instance.zookeeper.host"] = params.config['configurations'][
        'accumulo-site']['instance.zookeeper.host']
    if 'instance.rpc.sasl.enabled' in params.config['configurations'][
            'accumulo-site']:
        configs["instance.rpc.sasl.enabled"] = params.config['configurations'][
            'accumulo-site']['instance.rpc.sasl.enabled']
    PropertiesFile(format("{dest_conf_dir}/client.conf"),
                   properties=configs,
                   owner=params.accumulo_user,
                   group=params.user_group)

    # create log4j.properties files
    if (params.log4j_props != None):
        File(format("{params.conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.accumulo_user,
             content=params.log4j_props)
    else:
        File(format("{params.conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.hbase_user)

    # create logging configuration files
    accumulo_TemplateConfig("auditLog.xml", dest_conf_dir)
    accumulo_TemplateConfig("generic_logger.xml", dest_conf_dir)
    accumulo_TemplateConfig("monitor_logger.xml", dest_conf_dir)
    accumulo_StaticFile("accumulo-metrics.xml", dest_conf_dir)

    # create host files
    accumulo_StaticFile("tracers", dest_conf_dir)
    accumulo_StaticFile("gc", dest_conf_dir)
    accumulo_StaticFile("monitor", dest_conf_dir)
    accumulo_StaticFile("slaves", dest_conf_dir)
    accumulo_StaticFile("masters", dest_conf_dir)

    # metrics configuration
    if params.has_metric_collector:
        accumulo_TemplateConfig("hadoop-metrics2-accumulo.properties",
                                dest_conf_dir)

    # other server setup
    if name == 'master':
        params.HdfsDirectory(format("/user/{params.accumulo_user}"),
                             action="create_delayed",
                             owner=params.accumulo_user,
                             mode=0700)
        params.HdfsDirectory(format("{params.parent_dir}"),
                             action="create_delayed",
                             owner=params.accumulo_user,
                             mode=0700)
        params.HdfsDirectory(None, action="create")
        if params.security_enabled and params.has_secure_user_auth:
            Execute(format("{params.kinit_cmd} "
                           "{params.daemon_script} init "
                           "--user {params.accumulo_principal_name} "
                           "--instance-name {params.instance_name} "
                           "--clear-instance-name "
                           ">{params.log_dir}/accumulo-init.out "
                           "2>{params.log_dir}/accumulo-init.err"),
                    not_if=as_user(
                        format("{params.kinit_cmd} "
                               "{params.hadoop_bin_dir}/hadoop --config "
                               "{params.hadoop_conf_dir} fs -stat "
                               "{params.instance_volumes}"),
                        params.accumulo_user),
                    user=params.accumulo_user)
        else:
            passfile = format("{params.exec_tmp_dir}/pass")
            try:
                File(passfile,
                     mode=0600,
                     group=params.user_group,
                     owner=params.accumulo_user,
                     content=InlineTemplate('{{root_password}}\n'
                                            '{{root_password}}\n'))
                Execute(format("cat {passfile} | {params.daemon_script} init "
                               "--instance-name {params.instance_name} "
                               "--clear-instance-name "
                               ">{params.log_dir}/accumulo-init.out "
                               "2>{params.log_dir}/accumulo-init.err"),
                        not_if=as_user(
                            format("{params.kinit_cmd} "
                                   "{params.hadoop_bin_dir}/hadoop --config "
                                   "{params.hadoop_conf_dir} fs -stat "
                                   "{params.instance_volumes}"),
                            params.accumulo_user),
                        user=params.accumulo_user)
            finally:
                os.remove(passfile)

    if name == 'tracer':
        if params.security_enabled and params.has_secure_user_auth:
            Execute(format("{params.kinit_cmd} "
                           "{params.daemon_script} init --reset-security "
                           "--user {params.accumulo_principal_name} "
                           "--password NA "
                           ">{params.log_dir}/accumulo-reset.out "
                           "2>{params.log_dir}/accumulo-reset.err"),
                    not_if=as_user(
                        format("{params.kinit_cmd} "
                               "{params.daemon_script} shell -e "
                               "\"userpermissions -u "
                               "{params.accumulo_principal_name}\" | "
                               "grep System.CREATE_TABLE"),
                        params.accumulo_user),
                    user=params.accumulo_user)
            create_user(params.smokeuser_principal, params.smoke_test_password)
        else:
            # do not try to reset security in nonsecure mode, for now
            # Execute( format("{params.daemon_script} init --reset-security "
            #                 "--user root "
            #                 ">{params.log_dir}/accumulo-reset.out "
            #                 "2>{params.log_dir}/accumulo-reset.err"),
            #          not_if=as_user(format("cat {rpassfile} | "
            #                                "{params.daemon_script} shell -e "
            #                                "\"userpermissions -u root\" | "
            #                                "grep System.CREATE_TABLE"),
            #                         params.accumulo_user),
            #          user=params.accumulo_user)
            create_user(params.smoke_test_user, params.smoke_test_password)
        create_user(params.trace_user, params.trace_password)
        rpassfile = format("{params.exec_tmp_dir}/pass0")
        cmdfile = format("{params.exec_tmp_dir}/resetcmds")
        try:
            File(cmdfile,
                 mode=0600,
                 group=params.user_group,
                 owner=params.accumulo_user,
                 content=InlineTemplate(
                     'grant -t trace -u {{trace_user}} Table.ALTER_TABLE\n'
                     'grant -t trace -u {{trace_user}} Table.READ\n'
                     'grant -t trace -u {{trace_user}} Table.WRITE\n'))
            if params.security_enabled and params.has_secure_user_auth:
                Execute(format(
                    "{params.kinit_cmd} {params.daemon_script} shell -f "
                    "{cmdfile}"),
                        only_if=as_user(
                            format("{params.kinit_cmd} "
                                   "{params.daemon_script} shell "
                                   "-e \"table trace\""),
                            params.accumulo_user),
                        not_if=as_user(
                            format("{params.kinit_cmd} "
                                   "{params.daemon_script} shell "
                                   "-e \"userpermissions -u "
                                   "{params.trace_user} | "
                                   "grep Table.READ | grep trace"),
                            params.accumulo_user),
                        user=params.accumulo_user)
            else:
                File(rpassfile,
                     mode=0600,
                     group=params.user_group,
                     owner=params.accumulo_user,
                     content=InlineTemplate('{{root_password}}\n'))
                Execute(
                    format("cat {rpassfile} | {params.daemon_script} shell -f "
                           "{cmdfile} -u root"),
                    only_if=as_user(
                        format("cat {rpassfile} | "
                               "{params.daemon_script} shell -u root "
                               "-e \"table trace\""), params.accumulo_user),
                    not_if=as_user(
                        format("cat {rpassfile} | "
                               "{params.daemon_script} shell -u root "
                               "-e \"userpermissions -u "
                               "{params.trace_user} | "
                               "grep Table.READ | grep trace"),
                        params.accumulo_user),
                    user=params.accumulo_user)
        finally:
            try_remove(rpassfile)
            try_remove(cmdfile)
Пример #27
0
def falcon(type, action=None):
    import params
    if action == 'config':
        Directory(params.falcon_pid_dir,
                  owner=params.falcon_user,
                  recursive=True)
        Directory(params.falcon_log_dir,
                  owner=params.falcon_user,
                  recursive=True)
        Directory(params.falcon_webapp_dir,
                  owner=params.falcon_user,
                  recursive=True)
        Directory(params.falcon_home, owner=params.falcon_user, recursive=True)
        Directory(params.etc_prefix_dir, mode=0755, recursive=True)
        Directory(params.falcon_conf_dir,
                  owner=params.falcon_user,
                  recursive=True)
        File(params.falcon_conf_dir + '/falcon-env.sh',
             content=InlineTemplate(params.falcon_env_sh_template),
             owner=params.falcon_user)
        File(params.falcon_conf_dir + '/client.properties',
             content=Template('client.properties.j2'),
             mode=0644,
             owner=params.falcon_user)
        PropertiesFile(params.falcon_conf_dir + '/runtime.properties',
                       properties=params.falcon_runtime_properties,
                       mode=0644,
                       owner=params.falcon_user)
        PropertiesFile(params.falcon_conf_dir + '/startup.properties',
                       properties=params.falcon_startup_properties,
                       mode=0644,
                       owner=params.falcon_user)

        if params.falcon_graph_storage_directory:
            Directory(params.falcon_graph_storage_directory,
                      owner=params.falcon_user,
                      group=params.user_group,
                      mode=0775,
                      recursive=True,
                      cd_access="a")

        if params.falcon_graph_serialize_path:
            Directory(params.falcon_graph_serialize_path,
                      owner=params.falcon_user,
                      group=params.user_group,
                      mode=0775,
                      recursive=True,
                      cd_access="a")

    if type == 'server':
        if action == 'config':
            if params.store_uri[0:4] == "hdfs":
                params.HdfsDirectory(params.store_uri,
                                     action="create_delayed",
                                     owner=params.falcon_user,
                                     mode=0755)
            if params.store_uri[0:4] == "file":
                Directory(params.store_uri[7:],
                          owner=params.falcon_user,
                          recursive=True)
            params.HdfsDirectory(
                params.flacon_apps_dir,
                action="create_delayed",
                owner=params.falcon_user,
                mode=0777  #TODO change to proper mode
            )
            if params.falcon_store_uri[0:4] == "hdfs":
                params.HdfsDirectory(params.falcon_store_uri,
                                     action="create_delayed",
                                     owner=params.falcon_user,
                                     mode=0755)
            if params.falcon_store_uri[0:4] == "file":
                Directory(params.falcon_store_uri[7:],
                          owner=params.falcon_user,
                          recursive=True)
            params.HdfsDirectory(None, action="create")
            Directory(params.falcon_local_dir,
                      owner=params.falcon_user,
                      recursive=True,
                      cd_access="a")
            if params.falcon_embeddedmq_enabled == True:
                Directory(os.path.abspath(
                    os.path.join(params.falcon_embeddedmq_data, "..")),
                          owner=params.falcon_user,
                          recursive=True)
                Directory(params.falcon_embeddedmq_data,
                          owner=params.falcon_user,
                          recursive=True)

        if action == 'start':
            Execute(
                format('{falcon_home}/bin/falcon-start -port {falcon_port}'),
                user=params.falcon_user,
                path=params.hadoop_bin_dir)
        if action == 'stop':
            Execute(format('{falcon_home}/bin/falcon-stop'),
                    user=params.falcon_user,
                    path=params.hadoop_bin_dir)
            File(params.server_pid_file, action='delete')
Пример #28
0
def hive(name=None):
    import params

    if name == 'hiveserver2':

        params.HdfsDirectory(params.hive_apps_whs_dir,
                             action="create_delayed",
                             owner=params.hive_user,
                             mode=0777)
        params.HdfsDirectory(params.hive_hdfs_user_dir,
                             action="create_delayed",
                             owner=params.hive_user,
                             mode=params.hive_hdfs_user_mode)
        params.HdfsDirectory(None, action="create")

    # We should change configurations for client as well as for server.
    # The reason is that stale-configs are service-level, not component.
    for conf_dir in params.hive_conf_dirs_list:
        fill_conf_dir(conf_dir)

    if name == 'metastore' or name == 'hiveserver2':
        jdbc_connector()

    environment = {"no_proxy": format("{ambari_server_hostname}")}

    cmd = format("/bin/sh -c 'cd /usr/lib/ambari-agent/ && curl -kf -x \"\" "
                 "--retry 5 "
                 "{jdk_location}{check_db_connection_jar_name} "
                 "-o {check_db_connection_jar_name}'")

    Execute(cmd,
            not_if=format("[ -f {check_db_connection_jar_name}]"),
            environment=environment)

    if name == 'metastore':
        File(params.start_metastore_path,
             mode=0755,
             content=StaticFile('startMetastore.sh'))
        if params.init_metastore_schema:
            create_schema_cmd = format(
                "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                "{hive_bin}/schematool -initSchema "
                "-dbType {hive_metastore_db_type} "
                "-userName {hive_metastore_user_name} "
                "-passWord {hive_metastore_user_passwd!p}")

            check_schema_created_cmd = format(
                "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                "{hive_bin}/schematool -info "
                "-dbType {hive_metastore_db_type} "
                "-userName {hive_metastore_user_name} "
                "-passWord {hive_metastore_user_passwd!p}")

            Execute(create_schema_cmd, not_if=check_schema_created_cmd)
    elif name == 'hiveserver2':
        File(params.start_hiveserver2_path,
             mode=0755,
             content=Template(format('{start_hiveserver2_script}')))

    if name != "client":
        crt_directory(params.hive_pid_dir)
        crt_directory(params.hive_log_dir)
        crt_directory(params.hive_var_lib)
Пример #29
0
def webhcat():
  import params

  if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, "2.2.0.0") < 0:
    params.HdfsDirectory(params.webhcat_apps_dir,
                         action="create_delayed",
                         owner=params.webhcat_user,
                         mode=0755
    )
  
  if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
    params.HdfsDirectory(params.hcat_hdfs_user_dir,
                         action="create_delayed",
                         owner=params.hcat_user,
                         mode=params.hcat_hdfs_user_mode
    )
  params.HdfsDirectory(params.webhcat_hdfs_user_dir,
                       action="create_delayed",
                       owner=params.webhcat_user,
                       mode=params.webhcat_hdfs_user_mode
  )
  params.HdfsDirectory(None, action="create")

  Directory(params.templeton_pid_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            recursive=True)

  Directory(params.templeton_log_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            recursive=True)

  Directory(params.config_dir,
            recursive=True,
            owner=params.webhcat_user,
            group=params.user_group)

  if params.security_enabled:
    kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
  else:
    kinit_if_needed = ""

  if kinit_if_needed:
    Execute(kinit_if_needed,
            user=params.webhcat_user,
            path='/bin'
    )

  # TODO, these checks that are specific to HDP 2.2 and greater should really be in a script specific to that stack.
  if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, "2.2.0.0") >= 0:
    copy_tarballs_to_hdfs('hive', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group)
    copy_tarballs_to_hdfs('pig', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group)
    copy_tarballs_to_hdfs('hadoop-streaming', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group)
    copy_tarballs_to_hdfs('sqoop', 'hive-webhcat', params.webhcat_user, params.hdfs_user, params.user_group)
  else:
    CopyFromLocal(params.hadoop_streeming_jars,
                  owner=params.webhcat_user,
                  mode=0755,
                  dest_dir=params.webhcat_apps_dir,
                  kinnit_if_needed=kinit_if_needed,
                  hdfs_user=params.hdfs_user,
                  hadoop_bin_dir=params.hadoop_bin_dir,
                  hadoop_conf_dir=params.hadoop_conf_dir
    )

    if (os.path.isfile(params.pig_tar_file)):
      CopyFromLocal(params.pig_tar_file,
                    owner=params.webhcat_user,
                    mode=0755,
                    dest_dir=params.webhcat_apps_dir,
                    kinnit_if_needed=kinit_if_needed,
                    hdfs_user=params.hdfs_user,
                    hadoop_bin_dir=params.hadoop_bin_dir,
                    hadoop_conf_dir=params.hadoop_conf_dir
      )

    CopyFromLocal(params.hive_tar_file,
                  owner=params.webhcat_user,
                  mode=0755,
                  dest_dir=params.webhcat_apps_dir,
                  kinnit_if_needed=kinit_if_needed,
                  hdfs_user=params.hdfs_user,
                  hadoop_bin_dir=params.hadoop_bin_dir,
                  hadoop_conf_dir=params.hadoop_conf_dir
    )

    if (len(glob.glob(params.sqoop_tar_file)) > 0):
      CopyFromLocal(params.sqoop_tar_file,
                    owner=params.webhcat_user,
                    mode=0755,
                    dest_dir=params.webhcat_apps_dir,
                    kinnit_if_needed=kinit_if_needed,
                    hdfs_user=params.hdfs_user,
                    hadoop_bin_dir=params.hadoop_bin_dir,
                    hadoop_conf_dir=params.hadoop_conf_dir
      )

  # Replace _HOST with hostname in relevant principal-related properties
  webhcat_site = params.config['configurations']['webhcat-site'].copy()
  for prop_name in ['templeton.hive.properties', 'templeton.kerberos.principal']:
    if prop_name in webhcat_site:
      webhcat_site[prop_name] = webhcat_site[prop_name].replace("_HOST", params.hostname)

  XmlConfig("webhcat-site.xml",
            conf_dir=params.config_dir,
            configurations=webhcat_site,
            configuration_attributes=params.config['configuration_attributes']['webhcat-site'],
            owner=params.webhcat_user,
            group=params.user_group,
            )

  File(format("{config_dir}/webhcat-env.sh"),
       owner=params.webhcat_user,
       group=params.user_group,
       content=InlineTemplate(params.webhcat_env_sh_template)
  )
  
  Directory(params.webhcat_conf_dir,
       cd_access='a',
       recursive=True
  )

  log4j_webhcat_filename = 'webhcat-log4j.properties'
  if (params.log4j_webhcat_props != None):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=params.log4j_webhcat_props
    )
  elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=StaticFile(format("{config_dir}/{log4j_webhcat_filename}.template"))
    )
Пример #30
0
def webhcat():
    import params

    params.HdfsDirectory(params.webhcat_apps_dir,
                         action="create_delayed",
                         owner=params.webhcat_user,
                         mode=0755)
    if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
        params.HdfsDirectory(params.hcat_hdfs_user_dir,
                             action="create_delayed",
                             owner=params.hcat_user,
                             mode=params.hcat_hdfs_user_mode)
    params.HdfsDirectory(params.webhcat_hdfs_user_dir,
                         action="create_delayed",
                         owner=params.webhcat_user,
                         mode=params.webhcat_hdfs_user_mode)
    params.HdfsDirectory(None, action="create")

    Directory(params.templeton_pid_dir,
              owner=params.webhcat_user,
              mode=0755,
              group=params.user_group,
              recursive=True)

    Directory(params.templeton_log_dir,
              owner=params.webhcat_user,
              mode=0755,
              group=params.user_group,
              recursive=True)

    Directory(params.config_dir,
              owner=params.webhcat_user,
              group=params.user_group)

    XmlConfig(
        "webhcat-site.xml",
        conf_dir=params.config_dir,
        configurations=params.config['configurations']['webhcat-site'],
        configuration_attributes=params.config['configuration_attributes']
        ['webhcat-site'],
        owner=params.webhcat_user,
        group=params.user_group,
    )

    File(format("{config_dir}/webhcat-env.sh"),
         owner=params.webhcat_user,
         group=params.user_group,
         content=InlineTemplate(params.webhcat_env_sh_template))

    if params.security_enabled:
        kinit_if_needed = format(
            "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
    else:
        kinit_if_needed = ""

    if kinit_if_needed:
        Execute(kinit_if_needed, user=params.webhcat_user, path='/bin')

    CopyFromLocal('/usr/lib/hadoop-mapreduce/hadoop-streaming-*.jar',
                  owner=params.webhcat_user,
                  mode=0755,
                  dest_dir=params.webhcat_apps_dir,
                  kinnit_if_needed=kinit_if_needed,
                  hdfs_user=params.hdfs_user)