def action(self):
        logger.info('--> common.format_file_system <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        cluster_binary_dir = self.getClusterBinaryDir()

        #
        # clear hdfs files
        #
        """ folders for namenode """
        name_nodes = self.getHosts(roles=['namen', ])

        namefiles = os.path.join(self.getClusterHdfsDir(
            subdir=self.ys['roles']['namen']['dir']), '*')
        namesfiles = os.path.join(self.getClusterHdfsDir(
            subdir=self.ys['roles']['namen']['sdir']), '*')

        instructions = list()
        for host in name_nodes:
            ins = "ssh {0} {2}@{1} -tt 'rm -rf {3} {4}' ".format(
                ssh_option, host['ip'], host['usr'],
                namefiles, namesfiles)

            instructions.append(ins)

        ret = Command.parallel(instructions)
        if not ret:
            return ret

        """ folders for datanodes """
        data_nodes = self.getHosts(roles=['datan', ])

        datafiles = os.path.join(self.getClusterHdfsDir(
            subdir=self.ys['roles']['datan']['dir']), '*')

        instructions = list()
        for host in data_nodes:
            ins = "ssh {0} {2}@{1} -tt 'rm -rf {3}' ".format(
                ssh_option, host['ip'], host['usr'],
                datafiles)

            instructions.append(ins)

        ret = Command.parallel(instructions)
        if not ret:
            return ret

        #
        # formate
        #
        remote_ins = "{0} namenode -format -force".format(
            os.path.join(cluster_binary_dir, 'bin/hdfs'))

        ins = "ssh {0} {2}@{1} -tt '{3}' ".format(
            ssh_option, self.ys['roles']['namen']['hosts'][0],
            self.ys['roles']['namen']['usr'], remote_ins)

        return 0 == Command.do(ins)
Пример #2
0
    def action(self):
        logger.info('--> controlp.distribute_binary_package <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        host_list = self.getHosts()

        controlp_binary_dir = self.getControlPBinaryDir()
        cluster_binary_dir = self.getClusterBinaryDir()
        """ chmod """
        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S chmod -R 777 {3}' ".format(
                ssh_option, host['ip'], host['usr'], cluster_binary_dir)

            instructions.append((ins, host['pwd']))

        ret = Command.parallel(instructions)
        if not ret:
            return ret
        """ sync binary files """
        params = self.getParams()

        instructions = list()
        if len(params) > 0:
            """
            # with params
            """
            candidates = list()

            for p in params:
                candidates.append(self.__parse(p))

            for can in candidates:
                for host in host_list:
                    ins = ("scp -r {0} {3} {2}@{1}:{4}").format(
                        ssh_option, host['ip'], host['usr'], can[0], can[1])

                    instructions.append(ins)
        else:
            """
            # without params
            """
            controlp_binary_files = os.path.join(controlp_binary_dir, '*')
            cluster_binary_files = os.path.join(cluster_binary_dir, '*')

            for host in host_list:
                ins = ("ssh {0} {2}@{1} -tt 'mkdir -p {4} && rm -rf {5}'"
                       " && scp -r {0} {3} {2}@{1}:{4}").format(
                           ssh_option, host['ip'], host['usr'],
                           controlp_binary_files, cluster_binary_dir,
                           cluster_binary_files)

                instructions.append(ins)

        return Command.parallel(instructions)
Пример #3
0
    def action(self):
        logger.info('--> common.stop <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=600'

        host_list = self.getHosts()
        rm_list = self.getHosts(roles=['resourcem', ])

        # -- step1
        params = self.getParams()

        if len(params) == 0:
            params.append('hdfs')
            params.append('yarn')
            params.append('jobhistory')
            params.append('timelineserver')

        # -- step2
        EACH_HOST_INS = [] # such as 'nodemanager' and datanode

        instructions = list()

        for p in params:
            tlist = None
            if p in EACH_HOST_INS:
                tlist = host_list
            else:
                tlist = rm_list

            for host in tlist:
                if self.__parse(p) is None:
                    continue

                #!!! donot use -tt option
                ins = "ssh {0} {2}@{1} -T '{3}' ".format(
                    ssh_option, host['ip'], host['usr'],
                    self.__parse(p))
                instructions.append(ins)

        ret = Command.parallel(instructions)
        if not ret:
            return ret

        # -- step3 : remove 'process information unavailable'
        if len(self.getParams()) == 0:
            instructions = list()

            for host in host_list:
                ins = "ssh {0} {2}@{1} -tt 'sudo -S rm -rf /tmp/hsperfdata*'".format(
                    ssh_option, host['ip'], host['usr'])

                instructions.append((ins, host['pwd']))

            ret = Command.parallel(instructions)

        return ret
    def action(self):
        logger.info('--> controlp.install_compilation_prerequisites <--')

        ins = './utilities/setup_aliyun_maven_mirror.sh'
        retcode = Command.do(ins)
        if retcode != 0:
            return False

        ins = 'sudo -S ./utilities/install_compilation_prerequisites.sh'
        retcode = Command.sudo(ins, self.ys['roles']['controlp']['pwd'])
        if retcode != 0:
            return False

        return True
Пример #5
0
    def action(self):
        logger.info('--> common.clean <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=600'

        host_list = self.getHosts()
        cluster_script_dir = self.getClusterScriptDir()

        params = self.getParams()

        if len(params) == 0:
            params.append('log')
            params.append('tmp')

        instructions = list()

        for p in params:
            remote_ins = self.__parse(p)

            for host in host_list:
                #!!! donot use -tt option
                ins = "ssh {0} {2}@{1} -T '{3}' ".format(
                    ssh_option, host['ip'], host['usr'],
                    remote_ins)

                instructions.append(ins)

        return Command.parallel(instructions)
    def action(self):
        logger.info('--> common.configure_ganglia_monitor <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        host_list = self.getHosts()
        gmetad_list = self.getHosts(roles=[
            'gmetad',
        ])

        instructions = list()

        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S apt-get install -y collectd-core ganglia-modules-linux ganglia-monitor ganglia-monitor-python libganglia1-dev libgmetric4j-java libjmxetric-java'".format(
                ssh_option, host['ip'], host['usr'])
            instructions.append((ins, host['pwd']))

        for host in gmetad_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S apt-get install -y gmetad ganglia-webfrontend rrdtool'".format(
                ssh_option, host['ip'], host['usr'])
            instructions.append((ins, host['pwd']))

        ret = Command.parallel(instructions)
        if not ret:
            return ret
        """
    def action(self):
        logger.info('--> controlp.setup_passphraseless <--')

        host_list = self.getHosts()

        instructions = list()
        for host in host_list:
            # setup passphraseless
            ins = "./utilities/setup_passphraseless.sh '%s@%s' '%s'" % (
                host['usr'], host['ip'], host['pwd'])
            instructions.append(ins)

        return Command.parallel(ins_list)
Пример #8
0
    def action(self):
        logger.info('--> controlp.init_compile_src_code <--')

        controlp_source_dir = self.getControlPSourceDir()
        controlp_source_maven_plugins_dir = self.getControlPSourceDir(
            subdir='hadoop-maven-plugins')

        ins = " && ".join([
            "free",
            "cd %s" % (controlp_source_maven_plugins_dir),
            "mvn install",
            "cd %s" % (controlp_source_dir),
            "mvn clean",
            "mvn eclipse:eclipse -DdownloadSources=true -DdownloadJavadocs=true -DskipTests",
            # "mvn dependency-check:aggregate", # TODO, fix hanging
            # "mvn package -Pdist,native,docs,src -DskipTests -Dtar" # -Pdocs will enforce to check the format correction of docs and some mvn errors will occur.
            "mvn clean install -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar"
        ])
        retcode = Command.do(ins)
        if retcode != 0:
            Command.do("mvn package -DskipTests")
            return False

        return True
Пример #9
0
    def action(self):
        logger.info('--> common.submit <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=600'

        slaves_list = self.getSlaveHosts()

        params = self.getParams()

        instructions = list()

        for p in params:
            host = choice(slaves_list)

            #!!! donot use -tt option
            ins = "ssh {0} {2}@{1} -T 'cd {3} && {4}' ".format(
                ssh_option, host['ip'], host['usr'],
                self.getClusterBinaryDir(), p)

            instructions.append(ins)

        return Command.parallel(instructions)
    def action(self):
        logger.info('--> common.change_binarycode_mode_own <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        host_list = self.getHosts()

        cluster_binary_dir = self.getClusterBinaryDir()
        cluster_script_dir = self.getClusterScriptDir()

        remote_ins = "sudo -S %s %s %s %s" % (os.path.join(
            cluster_script_dir, 'change_binarycode_mode_own.sh'
        ), self.ys['opt']['group'], self.ys['opt']['user'], cluster_binary_dir)

        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt '{3}' ".format(ssh_option, host['ip'],
                                                      host['usr'], remote_ins)

            instructions.append((ins, host['pwd']))

        return Command.parallel(instructions)
Пример #11
0
    def action(self):
        logger.info('--> controlp.download_bin_code <--')

        controlp_binary_dir = self.getControlPBinaryDir()

        if not os.path.exists(controlp_binary_dir):
            os.makedirs(controlp_binary_dir)

        if not os.path.isdir(controlp_binary_dir):
            logger.error(
                '\'binary code\' does not indicate a folder in setting file.')
            return False

        link_address = "http://www-eu.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz".format(
            self.ys['version'])
        ins = "curl -sSL {0} | tar -C {1} -xzv".format(
            link_address, os.path.join(controlp_binary_dir, '../'))  # TODO, only exclude files

        retcode = Command.do(ins)
        if retcode != 0:
            return False

        return True
Пример #12
0
    def action(self):
        logger.info('--> common.install_runtime_prerequisties <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        host_list = self.getHosts()

        cluster_script_dir = self.getClusterScriptDir()

        #
        # build master and slaves environment
        #

        remote_ins = os.path.join(cluster_script_dir,
                                  'install_runtime_prerequisites.sh')

        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S {3}'".format(
                ssh_option, host['ip'], host['usr'], remote_ins)

            instructions.append((ins, host['pwd']))

        return Command.parallel(instructions)
    def action(self):
        logger.info('--> controlp.compile_src_code <--')

        controlp_source_dir = self.getControlPSourceDir()

        params = self.getParams()

        candidates = list()
        for p in params:
            candidates.append(self.__parse(p))

        if len(candidates) == 0:
            candidates.append(controlp_source_dir)

        instructions = list()
        for can in candidates:
            ins = " && ".join([
                "cd %s" % (can),
                "mvn clean install -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar"
            ])

            instructions.append(ins)

        return Command.parallel(instructions)
Пример #14
0
    def action(self):
        logger.info('--> controlp.distribute_binary_package<--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        host_list = self.getHosts()

        sourcecode = self.ys['sourcecode']
        binarycode = self.ys['binarycode']

        #
        # add permissions
        #
        for host in host_list:
            """
            create folders
            """
            ins = "ssh {0} {2}@{1} -tt 'sudo -S mkdir -p {3}' ".format(
                ssh_option, host['ip'], host['usr'], binarycode)

            retcode = cmd.sudo(ins, host['pwd'])

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False
            """
            chown
            """
            ins = "ssh {0} {2}@{1} -tt 'sudo -S chown -R {2} {3}' ".format(
                ssh_option, host['ip'], host['usr'], binarycode)

            retcode = cmd.sudo(ins, host['pwd'])

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False
            """
            chmod
            """
            ins = "ssh {0} {2}@{1} -tt 'sudo -S chmod -R 777 {3}' ".format(
                ssh_option, host['ip'], host['usr'], binarycode)

            retcode = cmd.sudo(ins, host['pwd'])

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        #
        # create hdfs folders
        #
        """
        folders for namenode
        """
        name_nodes = self.getHosts(roles=[
            'namen',
        ])

        namedir = os.path.join(binarycode, self.ys['roles']['namen']['dir'])
        namesdir = os.path.join(binarycode, self.ys['roles']['namen']['sdir'])

        for host in name_nodes:
            ins = "ssh {0} {2}@{1} -tt 'mkdir -p {3} {4}' ".format(
                ssh_option, host['ip'], host['usr'], namedir, namesdir)

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False
        """
        folders for datanodes
        """
        data_nodes = self.getHosts(roles=[
            'datan',
        ])

        datadir = os.path.join(binarycode, self.ys['roles']['datan']['dir'])

        for host in data_nodes:
            ins = "ssh {0} {2}@{1} -tt 'mkdir -p {3}' ".format(
                ssh_option, host['ip'], host['usr'], datadir)

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        #
        # binary code
        #
        sour_folder = os.path.join(self.getControlPBinaryFolder(), '*')
        dest_folder = os.path.join(binarycode, 'rose-on-yarn/')

        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'mkdir -p {4} && rm -rf {4}/*' && scp -r {0} {3} {2}@{1}:{4}".format(
                ssh_option, host['ip'], host['usr'], sour_folder, dest_folder)

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        #
        # scripts about building env
        #
        controlp_scripts = './utilities/*'
        dest_scripts_folder = os.path.join(binarycode, 'scripts/')

        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'mkdir -p {4}' && scp -r {0} {3} {2}@{1}:{4} ".format(
                ssh_option, host['ip'], host['usr'], controlp_scripts,
                dest_scripts_folder)

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        #
        # config setup_passphraseless from master to slaves
        #
        setup_passphraseless = os.path.join(dest_scripts_folder,
                                            'setup_passphraseless.sh')

        # hdfs
        namenode = self.getHosts(roles=[
            'namen',
        ])
        datanodes = self.getHosts(roles=[
            'datan',
        ])

        datanodes_hostname = list()
        for host in datanodes:
            datanodes_hostname.append("%s@%s" % (host['usr'], host['ip']))

        for host in namenode:
            ins = "ssh {0} {2}@{1} -tt '{3} \'{4}\' \'{5}\'' ".format(
                ssh_option, host['ip'], host['usr'], setup_passphraseless,
                ",".join(datanodes_hostname), self.ys['roles']['datan']['pwd'])

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        # yarn
        resourcemanager = self.getHosts(roles=[
            'resourcem',
        ])
        nodemanagers = self.getHosts(roles=[
            'nodem',
        ])

        nodemanagers_hostname = list()
        for host in nodemanagers:
            nodemanagers_hostname.append("%s@%s" % (host['usr'], host['ip']))

        for host in resourcemanager:
            ins = "ssh {0} {2}@{1} -tt '{3} \'{4}\' \'{5}\''".format(
                ssh_option, host['ip'], host['usr'], setup_passphraseless,
                ",".join(nodemanagers_hostname),
                self.ys['roles']['nodem']['pwd'])

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        #
        # configs
        #
        controlp_configs = './configs/*.xml ./configs/workers'
        dest_configs_folder = os.path.join(binarycode,
                                           'rose-on-yarn/etc/hadoop/')

        for host in host_list:
            ins = "scp {0} {2}@{1}:{3} ".format(controlp_configs, host['ip'],
                                                host['usr'],
                                                dest_configs_folder)

            retcode = cmd.do(ins)

            logger.info("ins: %s; retcode: %d." % (ins, retcode))

            if retcode != 0:
                logger.error(ins)
                return False

        #
        # wait to end
        #
        ins = 'wait'
        retcode = cmd.do(ins)
        if retcode != 0:
            logger.error(ins)
            return False

        return True
Пример #15
0
    def action(self):
        logger.info('--> controlp.distribute_binary_package_prep <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        host_list = self.getHosts()

        cluster_script_dir = self.getClusterScriptDir()
        cluster_binary_dir = self.getClusterBinaryDir()
        cluster_hdfs_dir = self.getClusterHdfsDir()
        cluster_base_dir = self.getClusterBaseDir()

        #
        # clear cluster base dir
        # -------------------------------------------------------
        #
        ret = True
        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S rm -rf `ls {3}/* | egrep -v {4}` ' ".format(
                ssh_option, host['ip'], host['usr'], cluster_base_dir,
                cluster_hdfs_dir)

            instructions.append((ins, host['pwd']))

        Command.parallel(instructions)

        #
        # add permissions
        # -------------------------------------------------------
        #
        """ create folders """
        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S mkdir -p {3}' ".format(
                ssh_option, host['ip'], host['usr'], cluster_binary_dir)

            instructions.append((ins, host['pwd']))

        Command.parallel(instructions)
        """ chown """
        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S chown -R {2} {3}' ".format(
                ssh_option, host['ip'], host['usr'], cluster_base_dir)

            instructions.append((ins, host['pwd']))

        Command.parallel(instructions)
        """ chmod """
        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S chmod -R 777 {3}' ".format(
                ssh_option, host['ip'], host['usr'], cluster_base_dir)

            instructions.append((ins, host['pwd']))

        Command.parallel(instructions)

        #
        # create hdfs folders
        # -------------------------------------------------------
        #
        instructions = list()
        """ folders for namenode """
        name_nodes = self.getHosts(roles=[
            'namen',
        ])

        namedir = self.getClusterHdfsDir(
            subdir=self.ys['roles']['namen']['dir'])

        namesdir = self.getClusterHdfsDir(
            subdir=self.ys['roles']['namen']['sdir'])

        for host in name_nodes:
            ins = "ssh {0} {2}@{1} -tt 'mkdir -p {3} {4}' ".format(
                ssh_option, host['ip'], host['usr'], namedir, namesdir)

            instructions.append(ins)
        """ folders for datanodes """
        data_nodes = self.getHosts(roles=[
            'datan',
        ])

        datadir = self.getClusterHdfsDir(
            subdir=self.ys['roles']['datan']['dir'])

        for host in data_nodes:
            ins = "ssh {0} {2}@{1} -tt 'mkdir -p {3}' ".format(
                ssh_option, host['ip'], host['usr'], datadir)

            instructions.append(ins)

        ret = Command.parallel(instructions)
        if not ret:
            return ret

        #
        # scripts about building env
        # -------------------------------------------------------
        #
        instructions = list()

        hbe_utilities = './utilities/*'

        for host in host_list:
            ins = ("ssh {0} {2}@{1} -tt 'mkdir -p {4}' "
                   "&& scp -r {0} {3} {2}@{1}:{4} ").format(
                       ssh_option, host['ip'], host['usr'], hbe_utilities,
                       cluster_script_dir)

            instructions.append(ins)

        ret = Command.parallel(instructions)
        if not ret:
            return ret

        #
        # config setup_passphraseless from master to slaves
        # -------------------------------------------------------
        #
        instructions = list()

        setup_passphraseless = os.path.join(cluster_script_dir,
                                            'setup_passphraseless.sh')
        """ hdfs """
        namenode = self.getHosts(roles=[
            'namen',
        ])
        datanodes = self.getHosts(roles=[
            'datan',
        ])

        datanodes_hostname = list()
        for host in datanodes:
            datanodes_hostname.append("%s@%s" % (host['usr'], host['ip']))

        for host in namenode:
            ins = "ssh {0} {2}@{1} -tt '{3} \'{4}\' \'{5}\'' ".format(
                ssh_option, host['ip'], host['usr'], setup_passphraseless,
                ",".join(datanodes_hostname), self.ys['roles']['datan']['pwd'])

            instructions.append(ins)
        """ yarn """
        resourcemanager = self.getHosts(roles=[
            'resourcem',
        ])
        nodemanagers = self.getHosts(roles=[
            'nodem',
        ])

        nodemanagers_hostname = list()
        for host in nodemanagers:
            nodemanagers_hostname.append("%s@%s" % (host['usr'], host['ip']))

        for host in resourcemanager:
            ins = "ssh {0} {2}@{1} -tt '{3} \'{4}\' \'{5}\''".format(
                ssh_option, host['ip'], host['usr'], setup_passphraseless,
                ",".join(nodemanagers_hostname),
                self.ys['roles']['nodem']['pwd'])

            instructions.append(ins)

        return Command.parallel(instructions)
Пример #16
0
    def action(self):
        logger.info('--> common.configure_site <--')

        ssh_option = '-o StrictHostKeyChecking=no -o ConnectTimeout=5'

        cluster_hadoop_lib_native = self.getClusterHadoopLibNativeDir()
        cluster_hadoop_conf_dir = self.getClusterHadoopConfDir()
        cluster_binary_dir = self.getClusterBinaryDir()
        cluster_hdfs_dir = self.getClusterHdfsDir()
        cluster_log_dir = self.getClusterLogDir()

        #
        # wirte slaves' ip into workers
        #
        slaves_list = self.getSlaveHosts()

        workers = open('./configs/workers', 'w')
        for host in slaves_list:
            workers.write("%s \n" % (host['ip']))
        workers.close()

        #
        # configure *-site.xml
        #
        shutil.copy2('./configs/default/hadoop-core.xml',
                     './configs/core-site.xml')
        shutil.copy2('./configs/default/hadoop-hdfs.xml',
                     './configs/hdfs-site.xml')
        shutil.copy2('./configs/default/hadoop-yarn.xml',
                     './configs/yarn-site.xml')
        shutil.copy2('./configs/default/hadoop-mapred.xml',
                     './configs/mapred-site.xml')

        # log-level
        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.map.log.level',
                  value='DEBUG')

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.reduce.log.level',
                  value='DEBUG')

        putconfig(file='./configs/mapred-site.xml',
                  name='yarn.app.mapreduce.am.log.level',
                  value='DEBUG')

        # hdfs
        putconfig(file='./configs/core-site.xml',
                  name='fs.defaultFS',
                  value="hdfs://%s:9000" %
                  self.ys['roles']['namen']['hosts'][0])

        putconfig(file='./configs/hdfs-site.xml',
                  name='dfs.replication',
                  value='3')

        putconfig(file='./configs/hdfs-site.xml',
                  name='dfs.namenode.name.dir',
                  value=os.path.join('file:', cluster_hdfs_dir,
                                     self.ys['roles']['namen']['dir']))

        putconfig(file='./configs/hdfs-site.xml',
                  name='dfs.namenode.checkpoint.dir',
                  value=os.path.join('file:', cluster_hdfs_dir,
                                     self.ys['roles']['namen']['sdir']))

        putconfig(file='./configs/hdfs-site.xml',
                  name='dfs.namenode.checkpoint.edits.dir',
                  value=os.path.join('file:', cluster_hdfs_dir,
                                     self.ys['roles']['namen']['sdir']))

        putconfig(file='./configs/hdfs-site.xml',
                  name='dfs.datanode.data.dir',
                  value=os.path.join('file:', cluster_hdfs_dir,
                                     self.ys['roles']['datan']['dir']))

        # mapreduce
        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.task.timeout',
                  value='300000')

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.map.memory.mb',
                  value='1536')

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.map.cpu.vcores',
                  value='1')

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.reduce.memory.mb',
                  value='2048')

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.reduce.cpu.vcores',
                  value='1')

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.framework.name',
                  value='yarn')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.aux-services',
                  value='mapreduce_shuffle')

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.nodemanager.env-whitelist',
            value=
            'JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME'
        )

        # yarn
        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.resourcemanager.hostname',
                  value=self.ys['roles']['resourcem']['hosts'][0])

        putconfig(file='./configs/mapred-site.xml',
                  name='yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms',
                  value='3000')

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.resourcemanager.nodemanagers.heartbeat-interval-ms',
            value='3000')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.webapp.ui2.enable',
                  value='false')

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.nodemanager.resource.detect-hardware-capabilities',
            value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.scheduler.minimum-allocation-mb',
                  value='512')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.recovery.enabled',
                  value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.address',
                  value='${yarn.nodemanager.hostname}:45678')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.recovery.supervised',
                  value='true')

        # ROSE: yarn->webapp
        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.resourcemanager.webapp.rrds.dir.cluster',
                  value=self.ys['gmetad']['rrds']['dir'])

        # -- logs and tmp
        putconfig(file='./configs/core-site.xml',
                  name='hadoop.tmp.dir',
                  value=self.getClusterTmpDir())

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.delete.debug-delay-sec',
                  value='86400')  # 86400sec = 1day

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.log.retain-seconds',
                  value='86400')  # 86400sec = 1day

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.log-aggregation-enable',
                  value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.remote-app-log-dir',
                  value=self.getClusterLogDir(subdir='remote-app-logs'))

        # jobhistory
        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.jobhistory.address',
                  value="%s:10020" % self.ys['roles']['resourcem']['hosts'][0])

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.jobhistory.webapp.address',
                  value="%s:19888" % self.ys['roles']['resourcem']['hosts'][0])

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.jobhistory.webapp.https.address',
                  value="%s:19890" % self.ys['roles']['resourcem']['hosts'][0])

        putconfig(file='./configs/mapred-site.xml',
                  name='mapreduce.jobhistory.admin.address',
                  value="%s:10033" % self.ys['roles']['resourcem']['hosts'][0])

        # -- timeline service
        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.timeline-service.enabled',
                  value='true')  # todo. configue timeline

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.timeline-service.version',
                  value='1.0f')  # 1.0f 1.5f

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.system-metrics-publisher.enabled',
                  value='true')

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.timeline-service.generic-application-history.enabled',
            value='true')

        putconfig(
            file='./configs/yarn-site.xml',
            name=
            'yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms ',
            value='60000')  # ms

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.timeline-service.hostname',
                  value='${yarn.resourcemanager.hostname}')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.timeline-service.recovery.enabled',
                  value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.timeline-service.ttl-enable',
                  value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.timeline-service.ttl-ms',
                  value='86400000')  # 86400000ms = 1day

        # yarn-support opportunistic container scheduler
        putconfig(
            file='./configs/yarn-site.xml',
            name=
            'yarn.resourcemanager.opportunistic-container-allocation.enabled',
            value='true')

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.nodemanager.opportunistic-containers-max-queue-length',
            value='20')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.container-monitor.interval-ms',
                  value='3000')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.health-checker.interval-ms',
                  value='60000')

        # yarn-support distributed scheduler
        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.distributed-scheduling.enabled',
                  value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.amrmproxy.enabled',
                  value='true')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.amrmproxy.address',
                  value='0.0.0.0:8049')

        putconfig(file='./configs/yarn-site.xml',
                  name='yarn.nodemanager.amrmproxy.client.thread-count',
                  value='3')

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.resourcemanager.scheduler.address',
            #value="%s:8030" % self.ys['roles']['resourcem']['hosts'][0])
            value='0.0.0.0:8049')  # on RM, must change it into rm-ip:8030

        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.nodemanager.amrmproxy.realrm.scheduler.address',
            value="%s:8030" % self.ys['roles']['resourcem']['hosts'][0],
            description=
            "SUNXY-ROSE: targets to help AMRMProxy find real RM scheduler address"
        )
        # ROSE
        putconfig(
            file='./configs/yarn-site.xml',
            name='yarn.rose.enabled',
            value='true',
            description=
            "SUNXY-ROSE: targets to manage opportunistic containers as an overselling method"
        )

        # ROSE: support distributed scheduler: on RM, must change it into rm-ip:8030
        shutil.copy2('./configs/yarn-site.xml', './configs/yarn-rm-site.xml')

        putconfig(file='./configs/yarn-rm-site.xml',
                  name='yarn.resourcemanager.scheduler.address',
                  value="%s:8030" % self.ys['roles']['resourcem']['hosts'][0])

        files = [
            './configs/core-site.xml', './configs/hdfs-site.xml',
            './configs/mapred-site.xml', './configs/yarn-site.xml',
            './configs/yarn-rm-site.xml'
        ]

        ins = " && ".join(map(lambda x: "format_file %s" % x, files))

        retcode = Command.do(ins)

        logger.info("ins: %s; retcode: %d." % (ins, retcode))

        if retcode != 0:
            logger.error(ins)
            return False

        #
        # configure ./etc/hadoop/*.sh
        #
        shutil.copy2('./configs/default/hadoop-env.sh',
                     './configs/hadoop-env.sh')
        hadoop_env_file = './configs/hadoop-env.sh'

        envlist = [
            ['PDSH_RCMD_TYPE', 'ssh'],
            ['JAVA_HOME', '/usr/lib/jvm/java-8-openjdk-amd64/'],
            ['HADOOP_HOME', cluster_binary_dir],
            ['HADOOP_YARN_HOME', cluster_binary_dir],
            ['HADOOP_HDFS_HOME', cluster_binary_dir],
            ['HADOOP_MAPRED_HOME', cluster_binary_dir],
            ['HADOOP_COMMON_HOME', cluster_binary_dir],
            ['HADOOP_COMMON_LIB_NATIVE_DIR', cluster_hadoop_lib_native],
            [
                'HADOOP_OPTS',
                "'\"${HADOOP_OPTS} -Djava.library.path=%s\"'" %
                (cluster_hadoop_lib_native)
            ],
            ['HADOOP_CONF_DIR', cluster_hadoop_conf_dir],
            ['HADOOP_LOG_DIR', cluster_log_dir],  # custom
            ['HADOOP_ROOT_LOGGER', 'DEBUG,console,RFA'],  # DEBUG mode custom
            ['HADOOP_DAEMON_ROOT_LOGGER',
             'DEBUG,console,RFA'],  # DEBUG mode custom
            ['HADOOP_SECURITY_LOGGER',
             'DEBUG,console,RFA'],  # DEBUG mode custom
            # ['YARN_CONF_DIR', cluster_hadoop_conf_dir],        # Deprecated
            # ['YARN_ROOT_LOGGER', 'DEBUG,console,RFA'],         # Deprecated
        ]

        ins = " && ".join(
            map(
                lambda x:
                "put_config_line --file %s --property %s --value %s --prefix 'export' "
                % (hadoop_env_file, x[0], x[1]), envlist))

        retcode = Command.do(ins)

        logger.info("ins: %s; retcode: %d." % (ins, retcode))

        if retcode != 0:
            logger.error(ins)
            return False

        #
        # configure ./etc/hadoop/hadoop-metrics2.properties
        #
        shutil.copy2('./configs/default/hadoop-metrics2.properties',
                     './configs/hadoop-metrics2.properties')
        hadoop_metrics_file = './configs/hadoop-metrics2.properties'
        gmond_host = self.ys['gmond']['host']

        envlist = [
            # ['jobhistoryserver.sink.ganglia.servers', gmond_host],
            # ['mrappmaster.sink.ganglia.servers', gmond_host],
            ['nodemanager.sink.ganglia.servers', gmond_host],
            ['resourcemanager.sink.ganglia.servers', gmond_host],
            # ['datanode.sink.ganglia.servers', gmond_host],
            # ['namenode.sink.ganglia.servers', gmond_host],
            # ['datanode.sink.file.filename', 'datanode-metrics.out'],
            [
                'resourcemanager.sink.file.filename',
                'resourcemanager-metrics.out'
            ],
            ['nodemanager.sink.file.filename', 'nodemanager-metrics.out'],
            # ['mrappmaster.sink.file.filename', 'mrappmaster-metrics.out'],
            # ['jobhistoryserver.sink.file.filename', 'jobhistoryserver-metrics.out'],
            [
                'nodemanager.sink.file_jvm.class',
                'org.apache.hadoop.metrics2.sink.FileSink'
            ],
            ['nodemanager.sink.file_jvm.context', 'jvm'],
            [
                'nodemanager.sink.file_jvm.filename',
                'nodemanager-jvm-metrics.out'
            ],
            [
                'nodemanager.sink.file_mapred.class',
                'org.apache.hadoop.metrics2.sink.FileSink'
            ],
            ['nodemanager.sink.file_mapred.context', 'mapred'],
            [
                'nodemanager.sink.file_mapred.filename',
                'nodemanager-mapred-metrics.out'
            ],
            [
                '*.sink.ganglia.class',
                'org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31'
            ],
            ['*.sink.ganglia.period', '10'],
            ['*.sink.ganglia.supportsparse', 'true'],
            [
                '*.sink.ganglia.slope',
                'jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both'
            ],
            [
                '*.sink.ganglia.dmax',
                'jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40'
            ],
        ]

        ins = " && ".join(
            map(
                lambda x: "put_config_line --file %s --property %s --value %s "
                % (hadoop_metrics_file, x[0], x[1]), envlist))

        retcode = Command.do(ins)

        logger.info("ins: %s; retcode: %d." % (ins, retcode))

        if retcode != 0:
            logger.error(ins)
            return False

        #
        # sync configures
        #
        host_list = self.getHosts()
        rm_list = self.getHosts(roles=[
            'resourcem',
        ])
        """ chmod """
        instructions = list()
        for host in host_list:
            ins = "ssh {0} {2}@{1} -tt 'sudo -S chmod -R 777 {3}' ".format(
                ssh_option, host['ip'], host['usr'], cluster_hadoop_conf_dir)

            instructions.append((ins, host['pwd']))

        ret = Command.parallel(instructions)
        if not ret:
            return ret
        """ sync files to all nodes """
        hbe_configs = './configs/hdfs-site.xml ./configs/mapred-site.xml \
                           ./configs/yarn-site.xml ./configs/core-site.xml \
                           ./configs/workers ./configs/hadoop-env.sh \
                           ./configs/hadoop-metrics2.properties'

        instructions = list()
        for host in host_list:
            ins = "ssh {1}@{0} -tt 'mkdir -p {3}' && scp {2} {1}@{0}:{3} ".format(
                host['ip'], host['usr'], hbe_configs, cluster_hadoop_conf_dir)

            instructions.append(ins)

        ret = Command.parallel(instructions)
        if not ret:
            return ret
        """ sync files to RMs """
        instructions = list()
        for host in rm_list:
            ins = "ssh {1}@{0} -tt 'mkdir -p {3}' && scp ./configs/yarn-rm-site.xml {1}@{0}:{2}".format(
                host['ip'], host['usr'],
                os.path.join(cluster_hadoop_conf_dir, 'yarn-site.xml'),
                cluster_hadoop_conf_dir)

            instructions.append(ins)

        return Command.parallel(instructions)