Python warn示例，execo_engine.logger.warn Python示例

示例#1

0

显示文件

    def __init__(self, jar_path, params=None, lib_paths=None):
        """Creates a new Hadoop MapReduce jar job with the given parameters.

        Args:
          jar_path (str):
            The local path of the jar containing the job.
          params (list of str, optional):
            The list of parameters of the job.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if not params:
            params = []
        if not lib_paths:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(jar_path):
            logger.error("Jar file " + jar_path + " does not exist")
            raise HadoopJobException("Jar file " + jar_path +
                                     " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.jar_path = jar_path
        self.params = params
        self.lib_paths = lib_paths

示例#2

0

显示文件

文件： objects.py 项目： mliroz/bigdata_dpy

    def __init__(self, jar_path, params=None, lib_paths=None):
        """Creates a new Hadoop MapReduce jar job with the given parameters.

        Args:
          jar_path (str):
            The local path of the jar containing the job.
          params (list of str, optional):
            The list of parameters of the job.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if not params:
            params = []
        if not lib_paths:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(jar_path):
            logger.error("Jar file " + jar_path + " does not exist")
            raise HadoopJobException("Jar file " + jar_path + " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.jar_path = jar_path
        self.params = params
        self.lib_paths = lib_paths

示例#3

0

显示文件

    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "spark-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [
                os.path.join(self.local_base_conf_dir, f)
                for f in os.listdir(self.local_base_conf_dir)
            ]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = []

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " +
                    str(missing_conf_files))

        remote_missing_files = [
            os.path.join(self.conf_dir, f) for f in missing_conf_files
        ]

        action = Get([self.master], remote_missing_files, self.temp_conf_dir)
        action.run()

示例#4

0

显示文件

文件： cluster.py 项目： mliroz/bigdata_dpy

    def _initialize_conf(self):
        """Merge locally-specified configuration files with default files
        from the distribution."""

        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [os.path.join(self.local_base_conf_dir, f)
                               for f in os.listdir(self.local_base_conf_dir)]
            for f in base_conf_files:
                shutil.copy(f, self.init_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        missing_conf_files = self.conf_mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " + str(
            missing_conf_files))

        remote_missing_files = [os.path.join(self.conf_dir, f)
                                for f in missing_conf_files]

        action = Get([self.master], remote_missing_files, self.init_conf_dir)
        action.run()

示例#5

0

显示文件

    def __init__(self, hosts, topo_list=None):
        """Create a Hadoop topology object assigning each host to the
        corresponding rack.

        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
        """

        if topo_list:
            if len(hosts) == len(topo_list):
                self.topology = topo_list
                return
            else:
                logger.warn("hosts and topology have not the same length.")

        logger.info("Discovering topology automatically")
        self.topology = {}
        for h in hosts:
            nw_adapters = get_host_attributes(h)[u'network_adapters']
            for nwa in nw_adapters:
                if (u'network_address' in nwa
                        and nwa[u'network_address'] == h.address):
                    self.topology[h] = "/" + nwa[u'switch']
                    break

示例#6

0

显示文件

文件： rally-g5k.py 项目： asimonet/rally-g5k

    def _run_or_abort(self,
                      cmd,
                      host,
                      error_message,
                      tear_down=True,
                      conn_params=None):
        """Attempt to run a command on the given host. If the command fails,
		error_message and the process error output will be printed.

		In addition, if tear_down is True, the tear_down() method will be
		called and the process will exit with return code 1"""

        if conn_params:
            p = EX.SshProcess(cmd, host, conn_params)
        else:
            p = EX.SshProcess(cmd, host)
        p.run()

        if p.exit_code != 0:
            logger.warn(error_message)

            if p.stderr is not None:
                logger.warn(p.stderr)

            logger.info(' '.join(p.cmd))

            if tear_down:
                self.tear_down()
                exit(1)

示例#7

0

显示文件

文件： objects.py 项目： djamelinfo/hadoop_g5k

    def __init__(self, hosts, topo_list=None):
        """Create a Hadoop topology object assigning each host to the
        corresponding rack.

        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
        """

        if topo_list:
            if len(hosts) == len(topo_list):
                self.topology = topo_list
                return
            else:
                logger.warn("hosts and topology have not the same length.")

        logger.info("Discovering topology automatically")
        self.topology = {}
        for h in hosts:
            nw_adapters = get_host_attributes(h)[u'network_adapters']
            for nwa in nw_adapters:
                if (u'network_address' in nwa and
                            nwa[u'network_address'] == h.address):
                    self.topology[h] = "/" + nwa[u'switch']
                    break

示例#8

0

显示文件

    def _initialize_conf(self):
        """Merge locally-specified configuration files with default files
        from the distribution."""

        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [
                os.path.join(self.local_base_conf_dir, f)
                for f in os.listdir(self.local_base_conf_dir)
            ]
            for f in base_conf_files:
                shutil.copy(f, self.init_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        missing_conf_files = self.conf_mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " +
                    str(missing_conf_files))

        remote_missing_files = [
            os.path.join(self.conf_dir, f) for f in missing_conf_files
        ]

        action = Get([self.master], remote_missing_files, self.init_conf_dir)
        action.run()

示例#9

0

显示文件

    def __force_clean(self):
        """Stop previous Spark processes (if any) and remove all remote files
        created by it."""

        spark_processes = ["Master", "Worker"]

        force_kill = False
        for h in self.hosts:
            proc = SshProcess("jps", h)
            proc.run()

            ids_to_kill = []
            for line in proc.stdout.splitlines():
                field = line.split()
                if field[1] in spark_processes:
                    ids_to_kill.append(field[0])

            if ids_to_kill:
                force_kill = True
                ids_to_kill_str = ""
                for pid in ids_to_kill:
                    ids_to_kill_str += " " + pid

                logger.warn("Killing running Spark processes in host %s" %
                            style.host(h.address.split('.')[0]))

                proc = SshProcess("kill -9" + ids_to_kill_str, h)
                proc.run()

        if force_kill:
            logger.info(
                "Processes from previous hadoop deployments had to be killed")

        self.clean_logs()

示例#10

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [os.path.join(self.local_base_conf_dir, f)
                               for f in os.listdir(self.local_base_conf_dir)]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = [CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE]

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " + str(
            missing_conf_files))

        remote_missing_files = [os.path.join(self.conf_dir, f)
                                for f in missing_conf_files]

        action = Get([self.master], remote_missing_files, self.temp_conf_dir)
        action.run()

示例#11

0

显示文件

文件： cassandra.py 项目： mliroz/bigdata_dpy

    def clean(self):
        """Remove all files created by Cassandra."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        self.clean_logs()

示例#12

0

显示文件

文件： spark.py 项目： djamelinfo/hadoop_g5k

    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr

示例#13

0

显示文件

    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr

示例#14

0

显示文件

文件： spark.py 项目： djamelinfo/hadoop_g5k

    def clean(self):
        """Remove all files created by Spark."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        self.clean_conf()
        self.clean_logs()

        self.initialized = False

示例#15

0

显示文件

    def clean(self):
        """Remove all files created by Spark."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        self.clean_conf()
        self.clean_logs()

        self.initialized = False

示例#16

0

显示文件

文件： hive.py 项目： djamelinfo/hadoop_g5k

    def _create_warehouse(self):
        """ """

        if not self.hc.running:
            logger.warn("Hadoop must be started first")
            self.hc.start_and_wait()

        logger.info("Creating warehouse dirs in HDFS")
        self.hc.execute("fs -mkdir -p /tmp", verbose=False)
        self.hc.execute("fs -mkdir -p /user/hive/warehouse", verbose=False)
        self.hc.execute("fs -chmod g+w /tmp", verbose=False)
        self.hc.execute("fs -chmod g+w /user/hive/warehouse", verbose=False)

示例#17

0

显示文件

文件： hive.py 项目： rwfazul/hadoop_g5k

    def _create_warehouse(self):
        """ """

        if not self.hc.running:
            logger.warn("Hadoop must be started first")
            self.hc.start_and_wait()

        logger.info("Creating warehouse dirs in HDFS")
        self.hc.execute("fs -mkdir -p /tmp", verbose=False)
        self.hc.execute("fs -mkdir -p /user/hive/warehouse", verbose=False)
        self.hc.execute("fs -chmod g+w /tmp", verbose=False)
        self.hc.execute("fs -chmod g+w /user/hive/warehouse", verbose=False)

示例#18

0

显示文件

    def format_dfs(self):
        """Format the distributed filesystem."""

        logger.info("Formatting HDFS")

        proc = SshProcess(self.bin_dir + "/hadoop namenode -format",
                          self.master)
        proc.run()

        if proc.finished_ok:
            logger.info("HDFS formatted successfully")
        else:
            logger.warn("Error while formatting HDFS")

示例#19

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def clean(self):
        """Remove all files created by Hadoop (logs, filesystem,
        temporary files)."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        self.clean_conf()
        self.clean_logs()
        self.clean_data()

        self.initialized = False

示例#20

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def format_dfs(self):
        """Format the distributed filesystem."""

        logger.info("Formatting HDFS")

        proc = SshProcess(self.bin_dir + "/hadoop namenode -format",
                          self.master)
        proc.run()

        if proc.finished_ok:
            logger.info("HDFS formatted successfully")
        else:
            logger.warn("Error while formatting HDFS")

示例#21

0

显示文件

    def clean(self):
        """Remove all files created by Hadoop (logs, filesystem,
        temporary files)."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        self.clean_conf()
        self.clean_logs()
        self.clean_data()

        self.initialized = False

示例#22

0

显示文件

    def stop_map_reduce(self):
        """Stop the JobTracker and TaskTrackers."""

        self._check_initialization()

        logger.info("Stopping MapReduce")

        proc = SshProcess(self.sbin_dir + "/stop-mapred.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while stopping MapReduce")
        else:
            self.running_map_reduce = False

示例#23

0

显示文件

    def stop_dfs(self):
        """Stop the NameNode and DataNodes."""

        self._check_initialization()

        logger.info("Stopping HDFS")

        proc = SshProcess(self.sbin_dir + "/stop-dfs.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while stopping HDFS")
        else:
            self.running_dfs = False

示例#24

0

显示文件

文件： cluster_v2.py 项目： sarlam/hadoop_g5k

    def stop_yarn(self):
        """Stop the YARN ResourceManager and NodeManagers."""

        self._check_initialization()

        logger.info("Stopping YARN")

        proc = SshProcess(self.sbin_dir + "/stop-yarn.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while stopping YARN")
        else:
            self.running_yarn = False

示例#25

0

显示文件

文件： cluster_v2.py 项目： mliroz/bigdata_dpy

    def stop_yarn(self):
        """Stop the YARN ResourceManager and NodeManagers."""
        
        self._check_initialization()

        logger.info("Stopping YARN")

        proc = SshProcess(self.sbin_dir + "/stop-yarn.sh", self.master)
        proc.run()
        
        if not proc.finished_ok:
            logger.warn("Error while stopping YARN")
        else:
            self.running_yarn = False

示例#26

0

显示文件

文件： mongodb.py 项目： mliroz/bigdata_dpy

    def _copy_conf(self, conf_dir, hosts=None):

        if not hosts:
            hosts = self.hosts

        conf_files = [os.path.join(conf_dir, f) for f in os.listdir(conf_dir)]

        action = TaktukPut(hosts, conf_files, self.conf_dir)
        action.run()

        if not action.finished_ok:
            logger.warn("Error while copying configuration")
            if not action.ended:
                action.kill()

示例#27

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def stop_map_reduce(self):
        """Stop the JobTracker and TaskTrackers."""

        self._check_initialization()

        logger.info("Stopping MapReduce")

        proc = SshProcess(self.sbin_dir + "/stop-mapred.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while stopping MapReduce")
        else:
            self.running_map_reduce = False

示例#28

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def stop_dfs(self):
        """Stop the NameNode and DataNodes."""

        self._check_initialization()

        logger.info("Stopping HDFS")

        proc = SshProcess(self.sbin_dir + "/stop-dfs.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while stopping HDFS")
        else:
            self.running_dfs = False

示例#29

0

显示文件

    def execute(self,
                command,
                node=None,
                should_be_running=True,
                verbose=True):
        """Execute the given Hadoop command in the given node.

        Args:
          command (str):
            The command to be executed.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          should_be_running (bool, optional):
            True if the cluster needs to be running in order to execute the
            command. If so, and it is not running, it is automatically started.
          verbose: (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the command.
        """

        self._check_initialization()

        if should_be_running and not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        if verbose:
            logger.info("Executing {" + self.bin_dir + "/hadoop " + command +
                        "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        return proc.stdout, proc.stderr

示例#30

0

显示文件

    def stop_spark(self):
        """Stop Spark processes."""

        logger.info("Stopping Spark")

        if self.mode == STANDALONE_MODE:
            proc = SshProcess(
                self.sbin_dir + "/stop-slaves.sh;" + self.sbin_dir +
                "/stop-master.sh;", self.master)
            proc.run()
            if not proc.finished_ok:
                logger.warn("Error while stopping Spark")
                return

        self.running = False

示例#31

0

显示文件

文件： hive.py 项目： djamelinfo/hadoop_g5k

    def start(self):
        """Start Hive processes."""

        logger.info("Starting Hive")

        if self.running:
            logger.warn("Hive was already started")
            return

        if not self.hc.running:
            logger.warn("Hadoop must be started first")
            self.hc.start_and_wait()

        # Do nothing
        self.running = True

示例#32

0

显示文件

文件： spark.py 项目： djamelinfo/hadoop_g5k

    def stop_spark(self):
        """Stop Spark processes."""

        logger.info("Stopping Spark")

        if self.mode == STANDALONE_MODE:
            proc = SshProcess(self.sbin_dir + "/stop-slaves.sh;" +
                              self.sbin_dir + "/stop-master.sh;",
                              self.master)
            proc.run()
            if not proc.finished_ok:
                logger.warn("Error while stopping Spark")
                return

        self.running = False

示例#33

0

显示文件

文件： hive.py 项目： rwfazul/hadoop_g5k

    def start(self):
        """Start Hive processes."""

        logger.info("Starting Hive")

        if self.running:
            logger.warn("Hive was already started")
            return

        if not self.hc.running:
            logger.warn("Hadoop must be started first")
            self.hc.start_and_wait()

        # Do nothing
        self.running = True

示例#34

0

显示文件

文件： hive.py 项目： rwfazul/hadoop_g5k

    def clean_logs(self):
        """Remove all Hive logs."""

        logger.info("Cleaning logs")

        restart = False
        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.logs_dir + "/* ", self.hosts)
        action.run()

        if restart:
            self.start()

示例#35

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def start_dfs_and_wait(self):
        """Start the NameNode and DataNodes and wait for exiting safemode."""

        self._check_initialization()

        self.start_dfs()

        logger.info("Waiting for safe mode to be off")
        proc = SshProcess(self.bin_dir + "/hadoop dfsadmin -safemode wait",
                          self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting HDFS")
        else:
            self.running_dfs = True

示例#36

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def clean_logs(self):
        """Remove all Hadoop logs."""

        logger.info("Cleaning logs")

        restart = False
        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.logs_dir + "/*", self.hosts)
        action.run()

        if restart:
            self.start()

示例#37

0

显示文件

    def start_dfs_and_wait(self):
        """Start the NameNode and DataNodes and wait for exiting safemode."""

        self._check_initialization()

        self.start_dfs()

        logger.info("Waiting for safe mode to be off")
        proc = SshProcess(self.bin_dir + "/hadoop dfsadmin -safemode wait",
                          self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting HDFS")
        else:
            self.running_dfs = True

示例#38

0

显示文件

    def clean_history(self):
        """Remove history."""

        logger.info("Cleaning history")

        restart = False
        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.logs_dir + "/history", [self.master])
        action.run()

        if restart:
            self.start()

示例#39

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def execute(self, command, node=None, should_be_running=True,
                verbose=True):
        """Execute the given Hadoop command in the given node.

        Args:
          command (str):
            The command to be executed.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          should_be_running (bool, optional):
            True if the cluster needs to be running in order to execute the
            command. If so, and it is not running, it is automatically started.
          verbose: (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the command.
        """

        self._check_initialization()

        if should_be_running and not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if not node:
            node = self.master

        if verbose:
            logger.info("Executing {" + self.bin_dir + "/hadoop " +
                        command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/hadoop " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        return (proc.stdout, proc.stderr)

示例#40

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def _configure_servers(self, hosts=None):
        """Configure servers and host-dependant parameters.

           Args:
             hosts (list of Host, optional):
               The list of hosts to take into account in the configuration. If
               not specified, all the hosts of the Hadoop cluster are used. The
               first host of this list is always used as the reference.
        """

        if not hosts:
            hosts = self.hosts

        host_attrs = get_host_attributes(hosts[0])
        num_cores = host_attrs[u'architecture'][u'smt_size']
        total_memory_mb = (int(host_attrs[u'main_memory'][u'ram_size']) /
                           (1024 * 1024)) - 2 * 1024
        mem_per_slot_mb = total_memory_mb / (num_cores - 1)

        replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE),
                            "fs.default.name",
                            "hdfs://" + self.master.address + ":" +
                                        str(self.hdfs_port) + "/",
                            True)
        replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE),
                            "hadoop.tmp.dir",
                            self.hadoop_temp_dir, True)
        replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE),
                            "topology.script.file.name",
                            self.conf_dir + "/topo.sh", True)

        replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE),
                            "mapred.job.tracker",
                            self.master.address + ":" +
                            str(self.mapred_port), True)
        replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE),
                            "mapred.tasktracker.map.tasks.maximum",
                            str(num_cores - 1), True)
        replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE),
                            "mapred.tasktracker.reduce.tasks.maximum",
                            str(num_cores - 1), True)
        if mem_per_slot_mb <= 0:
            logger.warn("Memory is negative, no setting")
        else:
            replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE),
                                "mapred.child.java.opts",
                                "-Xmx" + str(mem_per_slot_mb) + "m", True)

示例#41

0

显示文件

文件： util.py 项目： sarlam/hadoop_g5k

def uncompress(file_name, host):
    if file_name.endswith("tar.gz"):
        decompression = Remote("tar xf " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-7])
        dir_name = os.path.dirname(file_name[:-7])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-7] + " " + new_name, [host])
        action.run()
    elif file_name.endswith("gz"):
        decompression = Remote("gzip -d " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-3])
        dir_name = os.path.dirname(file_name[:-3])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-3] + " " + new_name, [host])
        action.run()
    elif file_name.endswith("zip"):
        decompression = Remote("unzip " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-4])
        dir_name = os.path.dirname(file_name[:-4])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-4] + " " + new_name, [host])
        action.run()
    elif file_name.endswith("bz2"):
        decompression = Remote("bzip2 -d " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-4])
        dir_name = os.path.dirname(file_name[:-4])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-4] + " " + new_name, [host])
        action.run()
    else:
        logger.warn("Unknown extension")
        return file_name

    return new_name

示例#42

0

显示文件

文件： dataset.py 项目： mliroz/bigdata_dpy

    def clean(self, hc):
        """Remove the dataset from dfs.
        
        Args:
          hc (HadoopCluster):
            The Hadoop cluster where the dataset has been deployed.
        """

        removed = False
        for (hcd, sized) in self.deployments:
            if hc == hcd:
                command = "fs -rmr " + self.deployments[hc, sized]
                hc.execute(command, should_be_running=True, verbose=False)
                removed = True

        if not removed:
            logger.warn("The dataset was not loaded in the given cluster")

示例#43

0

显示文件

文件： cluster_v2.py 项目： mliroz/bigdata_dpy

    def start_yarn(self):
        """Start the YARN ResourceManager and NodeManagers."""

        logger.info("Starting YARN")
        
        self._check_initialization()
        
        proc = SshProcess(self.sbin_dir + "/start-yarn.sh", self.master)
        proc.run()        
        
        if not proc.finished_ok:
            logger.warn("Error while starting YARN")
        else:
            #TODO: get success or not from super.
            self.running_yarn = True
            if self.running_dfs:
                self.running = True

示例#44

0

显示文件

文件： dataset.py 项目： sarlam/hadoop_g5k

    def clean(self, hc):
        """Remove the dataset from dfs.
        
        Args:
          hc (HadoopCluster):
            The Hadoop cluster where the dataset has been deployed.
        """

        removed = False
        for (hcd, sized) in self.deployments:
            if hc == hcd:
                command = "fs -rmr " + self.deployments[hc, sized]
                hc.execute(command, should_be_running=True, verbose=False)
                removed = True

        if not removed:
            logger.warn("The dataset was not loaded in the given cluster")

示例#45

0

显示文件

    def start_yarn(self):
        """Start the YARN ResourceManager and NodeManagers."""

        logger.info("Starting YARN")

        self._check_initialization()

        proc = SshProcess(self.sbin_dir + "/start-yarn.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting YARN")
        else:
            #TODO: get success or not from super.
            self.running_yarn = True
            if self.running_dfs:
                self.running = True

示例#46

0

显示文件

文件： util.py 项目： sarlam/hadoop_g5k

def uncompress(file_name, host):
    if file_name.endswith("tar.gz"):
        decompression = Remote("tar xf " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-7])
        dir_name = os.path.dirname(file_name[:-7])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-7] + " " + new_name, [host])
        action.run()
    elif file_name.endswith("gz"):
        decompression = Remote("gzip -d " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-3])
        dir_name = os.path.dirname(file_name[:-3])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-3] + " " + new_name, [host])
        action.run()
    elif file_name.endswith("zip"):
        decompression = Remote("unzip " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-4])
        dir_name = os.path.dirname(file_name[:-4])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-4] + " " + new_name, [host])
        action.run()
    elif file_name.endswith("bz2"):
        decompression = Remote("bzip2 -d " + file_name, [host])
        decompression.run()

        base_name = os.path.basename(file_name[:-4])
        dir_name = os.path.dirname(file_name[:-4])
        new_name = dir_name + "/data-" + base_name

        action = Remote("mv " + file_name[:-4] + " " + new_name, [host])
        action.run()
    else:
        logger.warn("Unknown extension")
        return file_name

    return new_name

示例#47

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def clean_history(self):
        """Remove history."""

        logger.info("Cleaning history")

        restart = False
        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.logs_dir + "/history",
                        [self.master])
        action.run()

        if restart:
            self.start()

示例#48

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def start_map_reduce(self):
        """Start the JobTracker and TaskTrackers."""

        self._check_initialization()

        logger.info("Starting MapReduce")

        if self.running_map_reduce:
            logger.warn("Error while starting MapReduce")
            return

        proc = SshProcess(self.sbin_dir + "/start-mapred.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.info("MapReduce started successfully")
        else:
            self.running_map_reduce = True

示例#49

0

显示文件

文件： cluster_v2.py 项目： mliroz/bigdata_dpy

    def clean_history(self):
        """Remove history."""

        logger.info("Cleaning history")

        restop = False
        if not self.running:
            logger.warn("The cluster needs to be running before cleaning.")
            self.start()
            restop = True

        user_login = getpass.getuser()
        hist_dfs_dir = "/tmp/hadoop-yarn/staging/history/done_intermediate/" +\
                       user_login
        self.execute("fs -rm -R " + hist_dfs_dir, verbose=False)

        if restop:
            self.stop()

示例#50

0

显示文件

文件： cassandra.py 项目： mliroz/bigdata_dpy

    def start(self):

        self._check_initialization()

        logger.info("Starting Cassandra")

        if self.running_cassandra:
            logger.warn("Cassandra was already started")
            return

        proc = TaktukRemote(self.bin_dir + "/cassandra", self.hosts)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting Cassandra")
        else:
            self.running_cassandra = True
            self.running = True

示例#51

0

显示文件

    def start_dfs(self):
        """Start the NameNode and DataNodes."""

        self._check_initialization()

        logger.info("Starting HDFS")

        if self.running_dfs:
            logger.warn("Dfs was already started")
            return

        proc = SshProcess(self.sbin_dir + "/start-dfs.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting HDFS")
        else:
            self.running_dfs = True

示例#52

0

显示文件

文件： cluster_v2.py 项目： sarlam/hadoop_g5k

    def clean_history(self):
        """Remove history."""

        logger.info("Cleaning history")

        restop = False
        if not self.running:
            logger.warn("The cluster needs to be running before cleaning.")
            self.start()
            restop = True

        user_login = getpass.getuser()
        hist_dfs_dir = "/tmp/hadoop-yarn/staging/history/done_intermediate/" +\
                       user_login
        self.execute("fs -rm -R " + hist_dfs_dir, verbose=False)

        if restop:
            self.stop()

示例#53

0

显示文件

    def start_map_reduce(self):
        """Start the JobTracker and TaskTrackers."""

        self._check_initialization()

        logger.info("Starting MapReduce")

        if self.running_map_reduce:
            logger.warn("Error while starting MapReduce")
            return

        proc = SshProcess(self.sbin_dir + "/start-mapred.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.info("MapReduce started successfully")
        else:
            self.running_map_reduce = True

示例#54

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def start_dfs(self):
        """Start the NameNode and DataNodes."""

        self._check_initialization()

        logger.info("Starting HDFS")

        if self.running_dfs:
            logger.warn("Dfs was already started")
            return

        proc = SshProcess(self.sbin_dir + "/start-dfs.sh", self.master)
        proc.run()

        if not proc.finished_ok:
            logger.warn("Error while starting HDFS")
        else:
            self.running_dfs = True

示例#55

0

显示文件

文件： hive.py 项目： rwfazul/hadoop_g5k

    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "hive-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [
                os.path.join(self.local_base_conf_dir, f)
                for f in os.listdir(self.local_base_conf_dir)
            ]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = ["hive-site.xml"]

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        # Copy or create mandatory files
        action = SshProcess("ls -1 " + self.conf_dir, self.master)
        action.run()
        files_in_conf_dir = action.stdout

        remote_missing_files = []
        for f in missing_conf_files:
            if f in files_in_conf_dir:
                remote_missing_files.append(os.path.join(self.conf_dir, f))
            else:
                create_xml_file(os.path.join(self.temp_conf_dir, f))

        if remote_missing_files:
            logger.info("Copying missing conf files from master: " +
                        str(remote_missing_files))

            action = Get([self.master], remote_missing_files,
                         self.temp_conf_dir)
            action.run()

示例#56

0

显示文件

文件： mongodb.py 项目： mliroz/bigdata_dpy

    def clean_data(self):
        """Remove all data created by Hadoop (including filesystem)."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        logger.info("Cleaning MongoDB data")

        restart = False
        if self.running:
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.data_dir + "/*", self.hosts)
        action.run()

        if restart:
            self.start()

示例#57

0

显示文件

文件： cluster.py 项目： lmolina/hadoop_g5k

    def clean_data(self):
        """Remove all data created by Hadoop (including filesystem)."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        logger.info("Cleaning hadoop data")

        restart = False
        if self.running:
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.hadoop_temp_dir + " /tmp/hadoop-" +
                        getpass.getuser() + "-*", self.hosts)
        action.run()

        if restart:
            self.start()

示例#58

0

显示文件

    def clean_data(self):
        """Remove all data created by Hadoop (including filesystem)."""

        if self.running:
            logger.warn("The cluster needs to be stopped before cleaning.")
            self.stop()

        logger.info("Cleaning hadoop data")

        restart = False
        if self.running:
            self.stop()
            restart = True

        action = Remote("rm -rf " + self.hadoop_temp_dir + " /tmp/hadoop-" +
                        getpass.getuser() + "-*", self.hosts)
        action.run()

        if restart:
            self.start()

示例#59

0

显示文件

    def __init__(self,
                 job_path,
                 exec_params=None,
                 app_params=None,
                 lib_paths=None):
        """Create a new Spark job with the given parameters.

        Args:
          job_path (str):
            The local path of the file containing the job binaries.
          exec_params (list of str, optional):
            The list of parameters used in job execution (e.g., driver-memory).
          app_params (list of str, optional):
            The list of parameters of the application.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if exec_params is None:
            exec_params = []
        if app_params is None:
            app_params = []
        if lib_paths is None:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(job_path):
            logger.error("Job binaries file " + job_path + " does not exist")
            raise SparkJobException("Job binaries file " + job_path +
                                    " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.job_path = job_path
        self.exec_params = exec_params
        self.app_params = app_params
        self.lib_paths = lib_paths

示例#60

0

显示文件

    def start_spark(self):
        """Start spark processes.
        In STANDALONE mode it starts the master and slaves. In YARN mode it just
        checks that Hadoop is running, and starts it if not.
        """

        logger.info("Starting Spark")

        if self.running:
            logger.warn("Spark was already started")
            return

        if self.mode == STANDALONE_MODE:
            proc = SshProcess(
                self.sbin_dir + "/start-master.sh;" + self.sbin_dir +
                "/start-slaves.sh;", self.master)
            proc.run()
            if not proc.finished_ok:
                logger.warn("Error while starting Spark")
                return
        elif self.mode == YARN_MODE:
            if not self.hc.running:
                logger.warn("YARN services must be started first")
                self.hc.start_and_wait()

        self.running = True