Exemplo n.º 1
0
 def _exec_on_node(self, command, machine, log):
     logger.info(log)
     rem = ex.action.Remote(command, machine, connection_params={'user':'******'}).run()
     if rem.ok :
         logger.info("Success")
     else:
         logger.error("Failure")
Exemplo n.º 2
0
    def _initialize_conf(self):
        """Merge locally-specified configuration files with default files
        from the distribution."""

        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [os.path.join(self.local_base_conf_dir, f)
                               for f in os.listdir(self.local_base_conf_dir)]
            for f in base_conf_files:
                shutil.copy(f, self.init_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        missing_conf_files = self.conf_mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " + str(
            missing_conf_files))

        remote_missing_files = [os.path.join(self.conf_dir, f)
                                for f in missing_conf_files]

        action = Get([self.master], remote_missing_files, self.init_conf_dir)
        action.run()
Exemplo n.º 3
0
def serialize_cluster(cluster_type, cid, cluster_object):
    """Serialize the cluster object. Replace also the linked Hadoop cluster if
    it exists.

    Args:
      cluster_type (str):
        The type of cluster to serialize.
      cid (int):
        The id of the cluster.
      cluster_object:
        The cluster to serialize.
    """

    fname = __get_cluster_file(cluster_type, cid)

    logger.info("Serialize cluster (" + cluster_type + ") in " + fname)

    c_file = open(fname, 'wb')
    pickle.dump(cluster_object, c_file)

    if cluster_type != HadoopCluster.get_cluster_type():
        hc_link_fname = __get_hc_link_file(cluster_type, cid)
        if os.path.exists(hc_link_fname):
            with open(hc_link_fname) as link_file:
                hc_id = int(link_file.readline())
            serialize_cluster(HadoopCluster.get_cluster_type(), hc_id,
                              cluster_object.hc)
Exemplo n.º 4
0
    def change_conf(self, params, conf_file=None, default_file=MR_CONF_FILE):
        """Modify Hadoop configuration. This method copies the configuration
        files from the first host of each g5k cluster conf dir into a local
        temporary dir, do all the changes in place and broadcast the new
        configuration files to all hosts.
        
        Args:
          params (dict of str:str):
            The parameters to be changed in the form key:value.
          conf_file (str, optional):
            The file where parameters should be set. If not specified, all
            files are checked for the parameter name and the parameter is set
            in the file where the property is found. If not found, the
            parameter is set in the default file.
          default_file (str, optional): The default conf file where to set the
            parameter if not found. Only applies when conf_file is not set.
        """

        for cluster in self.hw.get_clusters():
            hosts = cluster.get_hosts()

            # Copy conf files from first host in the cluster
            action = Remote("ls " + self.conf_dir + "/*.xml", [hosts[0]])
            action.run()
            output = action.processes[0].stdout

            remote_conf_files = []
            for f in output.split():
                remote_conf_files.append(os.path.join(self.conf_dir, f))

            tmp_dir = "/tmp/mliroz_temp_hadoop/"
            if not os.path.exists(tmp_dir):
                os.makedirs(tmp_dir)

            action = Get([hosts[0]], remote_conf_files, tmp_dir)
            action.run()

            # Do replacements in temp file
            if conf_file:
                f = os.path.join(tmp_dir, conf_file)
                for name, value in params.iteritems():
                    replace_in_xml_file(f, name, value, True)
            else:
                temp_conf_files = [os.path.join(tmp_dir, f) for f in
                                   os.listdir(tmp_dir)]

                for name, value in params.iteritems():
                    for f in temp_conf_files:
                        if replace_in_xml_file(f, name, value):
                            break
                    else:
                        # Property not found - add it in MR_CONF_FILE
                        logger.info("Parameter with name " + name + " has not "
                                    "been found in any conf file. Setting it "
                                    "in " + default_file)
                        f = os.path.join(tmp_dir, default_file)
                        replace_in_xml_file(f, name, value, True)

            # Copy back the files to all hosts
            self._copy_conf(tmp_dir, hosts)
Exemplo n.º 5
0
    def change_conf(self, params, conf_file=None, default_file=MR_CONF_FILE):
        """Modify Hadoop configuration. This method copies the configuration
        files from the first host of each g5k cluster conf dir into a local
        temporary dir, do all the changes in place and broadcast the new
        configuration files to all hosts.
        
        Args:
          params (dict of str:str):
            The parameters to be changed in the form key:value.
          conf_file (str, optional):
            The file where parameters should be set. If not specified, all
            files are checked for the parameter name and the parameter is set
            in the file where the property is found. If not found, the
            parameter is set in the default file.
          default_file (str, optional): The default conf file where to set the
            parameter if not found. Only applies when conf_file is not set.
        """

        for cluster in self.hw.get_clusters():
            hosts = cluster.get_hosts()

            # Copy conf files from first host in the cluster
            action = Remote("ls " + self.conf_dir + "/*.xml", [hosts[0]])
            action.run()
            output = action.processes[0].stdout

            remote_conf_files = []
            for f in output.split():
                remote_conf_files.append(os.path.join(self.conf_dir, f))

            tmp_dir = "/tmp/mliroz_temp_hadoop/"
            if not os.path.exists(tmp_dir):
                os.makedirs(tmp_dir)

            action = Get([hosts[0]], remote_conf_files, tmp_dir)
            action.run()

            # Do replacements in temp file
            if conf_file:
                f = os.path.join(tmp_dir, conf_file)
                for name, value in params.iteritems():
                    replace_in_xml_file(f, name, value, True)
            else:
                temp_conf_files = [os.path.join(tmp_dir, f) for f in
                                   os.listdir(tmp_dir)]

                for name, value in params.iteritems():
                    for f in temp_conf_files:
                        if replace_in_xml_file(f, name, value):
                            break
                    else:
                        # Property not found - add it in MR_CONF_FILE
                        logger.info("Parameter with name " + name + " has not "
                                    "been found in any conf file. Setting it "
                                    "in " + default_file)
                        f = os.path.join(tmp_dir, default_file)
                        replace_in_xml_file(f, name, value, True)

            # Copy back the files to all hosts
            self._copy_conf(tmp_dir, hosts)
Exemplo n.º 6
0
    def initial_state(self, outdir=None):
        """ Convert the dict given from parameters to Numpy array """
        logger.info(style.log_header('Initial boxes configuration\n') +
                    ''.ljust(8) +
                    ''.join([style.emph(box.rjust(10))
                             for box in self.Boxes.iterkeys()]) +
                    style.object_repr('\n' + 'Delta'.ljust(8)) +
                    ''.join([str(box['Delta']).rjust(10)
                             for box in self.Boxes.itervalues()]) +
                    style.object_repr('\n' + 'Mass'.ljust(8)) +
                    ''.join([str(box['Mass']).rjust(10)
                             for box in self.Boxes.itervalues()])
                    )
        if outdir is None:
            outdir = self.result_dir + '/'
        self.plot_state(self.Boxes.keys(),
                        array([box['Delta']
                               for box in self.Boxes.itervalues()]),
                        name='_initial', outdir=outdir)

        self._Mass = array([box['Mass']
                            for box in self.Boxes.itervalues()])
        self._Flux = array([box.values()
                            for box in self.Flux.values()])
        self._Partcoeff = array([box.values()
                                 for box in self.Partcoeff.values()])

        f = open(outdir + '/Delta.initial', 'w')
        for box, value in self.Boxes.iteritems():
            f.write(box + ' ' + str(value['Delta']) + '\n')
        f.close()
        return [box['Delta'] for box in self.Boxes.itervalues()]
Exemplo n.º 7
0
def install_os(reconfigure, tags = None):
    update_config_state()

    # Clone or pull Kolla
    if os.path.isdir('kolla'):
        logger.info("Remove previous Kolla installation")
        kolla_path = os.path.join(SCRIPT_PATH, "kolla")
        call("rm -rf %s" % kolla_path, shell=True)

    logger.info("Cloning Kolla")
    call("cd %s ; git clone %s -b %s > /dev/null" % (SCRIPT_PATH, KOLLA_REPO, KOLLA_BRANCH), shell=True)

    logger.warning("Patching kolla, this should be \
            deprecated with the new version of Kolla")

    playbook = os.path.join(SCRIPT_PATH, "ansible/patches.yml")
    inventory_path = os.path.join(SYMLINK_NAME, 'multinode')
    run_ansible([playbook], inventory_path, STATE['config'])

    kolla_cmd = [os.path.join(SCRIPT_PATH, "kolla", "tools", "kolla-ansible")]

    if reconfigure:
        kolla_cmd.append('reconfigure')
    else:
        kolla_cmd.append('deploy')

    kolla_cmd.extend(["-i", "%s/multinode" % SYMLINK_NAME,
                  "--passwords", "%s/passwords.yml" % SYMLINK_NAME,
                  "--configdir", "%s" % SYMLINK_NAME])

    if tags is not None:
        kolla_cmd.extend(["--tags", args])

    call(kolla_cmd)
Exemplo n.º 8
0
 def __init__(self):
     """ Add options for the number of measures, migration bandwidth, number of nodes
     walltime, env_file or env_name, stress, and clusters and initialize the engine """
     super(BoxModel, self).__init__()
     self.init_plots()
     logger.setLevel('INFO')
     logger.info(set_style('\n\n                 Welcome to the human isotopic Box Model\n', 'log_header'))
Exemplo n.º 9
0
    def _run_or_abort(self,
                      cmd,
                      host,
                      error_message,
                      tear_down=True,
                      conn_params=None):
        """Attempt to run a command on the given host. If the command fails,
		error_message and the process error output will be printed.

		In addition, if tear_down is True, the tear_down() method will be
		called and the process will exit with return code 1"""

        if conn_params:
            p = EX.SshProcess(cmd, host, conn_params)
        else:
            p = EX.SshProcess(cmd, host)
        p.run()

        if p.exit_code != 0:
            logger.warn(error_message)

            if p.stderr is not None:
                logger.warn(p.stderr)

            logger.info(' '.join(p.cmd))

            if tear_down:
                self.tear_down()
                exit(1)
Exemplo n.º 10
0
    def __force_clean(self):
        """Stop previous Hive processes (if any) and remove all remote files
        created by it."""

        hive_processes = []

        force_kill = False
        for h in self.hosts:
            proc = SshProcess("jps", self.master)
            proc.run()

            ids_to_kill = []
            for line in proc.stdout.splitlines():
                field = line.split()
                if field[1] in hive_processes:
                    ids_to_kill.append(field[0])

            if ids_to_kill:
                force_kill = True
                ids_to_kill_str = ""
                for pid in ids_to_kill:
                    ids_to_kill_str += " " + pid

                proc = SshProcess("kill -9" + ids_to_kill_str, h)
                proc.run()

        if force_kill:
            logger.info(
                "Processes from previous hadoop deployments had to be killed")

        self.clean_logs()
Exemplo n.º 11
0
    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "spark-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [
                os.path.join(self.local_base_conf_dir, f)
                for f in os.listdir(self.local_base_conf_dir)
            ]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = []

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " +
                    str(missing_conf_files))

        remote_missing_files = [
            os.path.join(self.conf_dir, f) for f in missing_conf_files
        ]

        action = Get([self.master], remote_missing_files, self.temp_conf_dir)
        action.run()
Exemplo n.º 12
0
    def __force_clean(self):
        """Stop previous Spark processes (if any) and remove all remote files
        created by it."""

        spark_processes = ["Master", "Worker"]

        force_kill = False
        for h in self.hosts:
            proc = SshProcess("jps", h)
            proc.run()

            ids_to_kill = []
            for line in proc.stdout.splitlines():
                field = line.split()
                if field[1] in spark_processes:
                    ids_to_kill.append(field[0])

            if ids_to_kill:
                force_kill = True
                ids_to_kill_str = ""
                for pid in ids_to_kill:
                    ids_to_kill_str += " " + pid

                logger.warn("Killing running Spark processes in host %s" %
                            style.host(h.address.split('.')[0]))

                proc = SshProcess("kill -9" + ids_to_kill_str, h)
                proc.run()

        if force_kill:
            logger.info(
                "Processes from previous hadoop deployments had to be killed")

        self.clean_logs()
Exemplo n.º 13
0
    def start_spark(self):
        """Start spark processes.
        In STANDALONE mode it starts the master and slaves. In YARN mode it just
        checks that Hadoop is running, and starts it if not.
        """

        logger.info("Starting Spark")

        if self.running:
            logger.warn("Spark was already started")
            return

        if self.mode == STANDALONE_MODE:
            proc = SshProcess(
                self.sbin_dir + "/start-master.sh;" + self.sbin_dir +
                "/start-slaves.sh;", self.master)
            proc.run()
            if not proc.finished_ok:
                logger.warn("Error while starting Spark")
                return
        elif self.mode == YARN_MODE:
            if not self.hc.running:
                logger.warn("YARN services must be started first")
                self.hc.start_and_wait()

        self.running = True
Exemplo n.º 14
0
    def start_spark(self):
        """Start spark processes.
        In STANDALONE mode it starts the master and slaves. In YARN mode it just
        checks that Hadoop is running, and starts it if not.
        """

        logger.info("Starting Spark")

        if self.running:
            logger.warn("Spark was already started")
            return

        if self.mode == STANDALONE_MODE:
            proc = SshProcess(self.sbin_dir + "/start-master.sh;" +
                              self.sbin_dir + "/start-slaves.sh;",
                              self.master)
            proc.run()
            if not proc.finished_ok:
                logger.warn("Error while starting Spark")
                return
        elif self.mode == YARN_MODE:
            if not self.hc.running:
                logger.warn("YARN services must be started first")
                self.hc.start_and_wait()

        self.running = True
Exemplo n.º 15
0
 def __init__(self):
     """Initialize the execo engine"""
     super(IsotopicBoxModel, self).__init__()
     self.init_plots()
     logger.info(style.log_header('\n\n                 Welcome to the ' +
                                  'human isotopic Box Model\n'))
     logger.debug(pformat(self.__dict__))
Exemplo n.º 16
0
def serialize_cluster(cluster_type, cid, cluster_object):
    """Serialize the cluster object. Replace also the linked Hadoop cluster if
    it exists.

    Args:
      cluster_type (str):
        The type of cluster to serialize.
      cid (int):
        The id of the cluster.
      cluster_object:
        The cluster to serialize.
    """

    fname = __get_cluster_file(cluster_type, cid)

    logger.info("Serialize cluster (" + cluster_type + ") in " + fname)

    c_file = open(fname, 'wb')
    pickle.dump(cluster_object, c_file)

    if cluster_type != HadoopCluster.get_cluster_type():
        hc_link_fname = __get_hc_link_file(cluster_type, cid)
        if os.path.exists(hc_link_fname):
            with open(hc_link_fname) as link_file:
                hc_id = int(link_file.readline())
            serialize_cluster(HadoopCluster.get_cluster_type(), hc_id,
                              cluster_object.hc)
Exemplo n.º 17
0
    def get_host(self):
        """Returns the hosts from an existing reservation (if any), or from
		a new reservation"""

        # Look if there is a running job
        self.site = get_cluster_site(self.config['cluster'])
        jobs = EX5.get_current_oar_jobs([self.site])

        self.job_id = None
        for t in jobs:
            if EX5.get_oar_job_info(
                    t[0], self.site)['name'] == self.options.job_name:
                self.job_id = t[0]
                break

        if self.job_id:
            logger.info('Using job %s' % style.emph(self.job_id))
        else:
            logger.info('Making a new reservation')
            self._make_reservation(self.site)

        if not self.job_id:
            logger.error("Could not get a reservation for the job")
            exit(6)

        EX5.wait_oar_job_start(self.job_id, self.site)

        pp(EX5.get_oar_job_nodes(self.job_id, self.site))
        return EX5.get_oar_job_nodes(self.job_id, self.site)[0]
    def run(self):
        # Go to the result folder before everything
        os.chdir(self.result_dir)

        # jobs = [(_jobID, _site)]
        # Get nodes
        # nodes = get_oar_job_nodes(_jobID, _site)

        try:
            # logger.info("Creating hostfiles for all combinations...")
            # for nbr_node in _nbrNodes:
            #     hostfile_filename = self.result_dir + '/' + 'hostfile-' + nbr_node
            #     with open(hostfile_filename, 'w') as hostfile:
            #         for node in nodes[:int(nbr_node)]:
            #             print>>hostfile, node.address

            spack_command = 'spack install -v chameleon@trunk+starpu+fxt ^starpu@svn-trunk+fxt'
            # spack_process = Remote(spack_command, nodes)
            logger.info("Starting StarPU installation...")
            spack_process = Process(spack_command).start()            

            spack_process.wait()
            logger.info("StarPU installation DONE...")
            if  (not spack_process.ok):
                logger.info("Error : " + spack_process.error_reason)
            else:
                logger.info("spac stdout: {}".format(spack_process.stdout));
            spack_process.kill()

            # Pilotage
        except:
            traceback.print_exc()
        finally:
	        logger.info("Fin...")
Exemplo n.º 19
0
    def _copy_xp_output(self):
        """Copy experiment's output."""

        if self.output_path:
            remote_path = self.macro_manager.test_macros["xp.output"]  # TODO: what happens if not specified?
            local_path = os.path.join(self.output_path, str(self.comb_id))
            logger.info("Copying output to " + local_path)

            tmp_dir = "/tmp"

            # Remove file in tmp dir if exists
            proc = SshProcess("rm -rf " +
                              os.path.join(tmp_dir, os.path.basename(remote_path)),
                              self.hc.master)
            proc.run()

            # Get files in master
            self.hc.execute("fs -get " + remote_path + " " + tmp_dir,
                            verbose=False)

            # Copy files from master
            action = Get([self.hc.master],
                         [os.path.join(tmp_dir, os.path.basename(remote_path))],
                         local_path)
            action.run()
Exemplo n.º 20
0
    def _initialize_conf(self):
        """Merge locally-specified configuration files with default files
        from the distribution."""

        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [
                os.path.join(self.local_base_conf_dir, f)
                for f in os.listdir(self.local_base_conf_dir)
            ]
            for f in base_conf_files:
                shutil.copy(f, self.init_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        missing_conf_files = self.conf_mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " +
                    str(missing_conf_files))

        remote_missing_files = [
            os.path.join(self.conf_dir, f) for f in missing_conf_files
        ]

        action = Get([self.master], remote_missing_files, self.init_conf_dir)
        action.run()
Exemplo n.º 21
0
    def __init__(self, hosts, topo_list=None):
        """Create a Hadoop topology object assigning each host to the
        corresponding rack.

        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
        """

        if topo_list:
            if len(hosts) == len(topo_list):
                self.topology = topo_list
                return
            else:
                logger.warn("hosts and topology have not the same length.")

        logger.info("Discovering topology automatically")
        self.topology = {}
        for h in hosts:
            nw_adapters = get_host_attributes(h)[u'network_adapters']
            for nwa in nw_adapters:
                if (u'network_address' in nwa and
                            nwa[u'network_address'] == h.address):
                    self.topology[h] = "/" + nwa[u'switch']
                    break
Exemplo n.º 22
0
    def __init__(self, hosts, topo_list=None):
        """Create a Hadoop topology object assigning each host to the
        corresponding rack.

        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
        """

        if topo_list:
            if len(hosts) == len(topo_list):
                self.topology = topo_list
                return
            else:
                logger.warn("hosts and topology have not the same length.")

        logger.info("Discovering topology automatically")
        self.topology = {}
        for h in hosts:
            nw_adapters = get_host_attributes(h)[u'network_adapters']
            for nwa in nw_adapters:
                if (u'network_address' in nwa
                        and nwa[u'network_address'] == h.address):
                    self.topology[h] = "/" + nwa[u'switch']
                    break
Exemplo n.º 23
0
    def __force_clean(self):
        """Stop previous Hive processes (if any) and remove all remote files
        created by it."""

        hive_processes = []

        force_kill = False
        for h in self.hosts:
            proc = SshProcess("jps", self.master)
            proc.run()

            ids_to_kill = []
            for line in proc.stdout.splitlines():
                field = line.split()
                if field[1] in hive_processes:
                    ids_to_kill.append(field[0])

            if ids_to_kill:
                force_kill = True
                ids_to_kill_str = ""
                for pid in ids_to_kill:
                    ids_to_kill_str += " " + pid

                proc = SshProcess("kill -9" + ids_to_kill_str, h)
                proc.run()

        if force_kill:
            logger.info(
                "Processes from previous hadoop deployments had to be killed")

        self.clean_logs()
Exemplo n.º 24
0
    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [os.path.join(self.local_base_conf_dir, f)
                               for f in os.listdir(self.local_base_conf_dir)]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = [CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE]

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " + str(
            missing_conf_files))

        remote_missing_files = [os.path.join(self.conf_dir, f)
                                for f in missing_conf_files]

        action = Get([self.master], remote_missing_files, self.temp_conf_dir)
        action.run()
Exemplo n.º 25
0
    def load(self):
        """Load the configuration file"""

        # Load the configuration file
        try:
            with open(self.config_path) as config_file:
                config = yaml.load(config_file)
        except:
            logger.error("Error reading configuration file %s" %
                         self.config_path)
            t, value, tb = sys.exc_info()
            print("%s %s" % (str(t), str(value)))
            sys.exit(23)

        # Load g5k networks
        with open(NETWORK_FILE) as network_file:
            self.networks = yaml.load(network_file)


        self.config = {}
        self.config.update(DEFAULT_CONFIG)
        self.config.update(config)

        logger.info("Configuration file loaded : %s" % self.config_path)
        logger.info(pf(self.config))

        return self.config
Exemplo n.º 26
0
    def _copy_xp_output(self):
        """Copy experiment's output."""

        if self.output_path:
            remote_path = self.macro_manager.test_macros[
                "xp.output"]  # TODO: what happens if not specified?
            local_path = os.path.join(self.output_path, str(self.comb_id))
            logger.info("Copying output to " + local_path)

            tmp_dir = "/tmp"

            # Remove file in tmp dir if exists
            proc = SshProcess(
                "rm -rf " +
                os.path.join(tmp_dir, os.path.basename(remote_path)),
                self.hc.master)
            proc.run()

            # Get files in master
            self.hc.execute("fs -get " + remote_path + " " + tmp_dir,
                            verbose=False)

            # Copy files from master
            action = Get(
                [self.hc.master],
                [os.path.join(tmp_dir, os.path.basename(remote_path))],
                local_path)
            action.run()
Exemplo n.º 27
0
def boot_vms_by_core(vms):
    """ """
    n_vm = len(vms)
    if n_vm == 0:
        return True
    if isinstance(vms[0]['host'], Host):
        host = vms[0]['host'].address.split('.')[0]
    else:
        host = vms[0]['host'].split('.')[0]

    sub_vms = {}
    for i_core in list(set(vm['cpuset'] for vm in vms)):
        sub_vms[i_core] = list()
        for vm in vms:
            if vm['cpuset'] == i_core:
                sub_vms[i_core].append(vm)
    booted_vms = 0
    while len(sub_vms.keys()) > 0:
        vms_to_boot = []
        for i_core in sub_vms.keys():
            vms_to_boot.append(sub_vms[i_core][0])
            sub_vms[i_core].pop(0)
            if len(sub_vms[i_core]) == 0:
                del sub_vms[i_core]

        logger.info(style.Thread(host) + ': Starting VMS '+', '.join( [vm['id'] for vm in sorted(vms_to_boot)]))
        start_vms(vms_to_boot).run()
        booted = wait_vms_have_started(vms_to_boot)
        if not booted:
            return False
        booted_vms += len(vms_to_boot)
        logger.info(style.Thread(host)+': '+style.emph(str(booted_vms)+'/'+str(n_vm)))
    return True
Exemplo n.º 28
0
    def prepare_dataset(self, comb):
        """Prepare the dataset to be used in the next set of experiments.

        Args:
          comb (dict): The combination containing the dataset's parameters.

        Returns:
          dict: The dataset parameters.

        """

        # Create ds_comb
        (ds_class_name, ds_params) = self.comb_manager.get_ds_class_params(comb)

        local_path = ds_params["local_path"]
        remote_path = os.path.join(self.div_p2p.remote_dir,
                                   os.path.basename(local_path))

        ds_comb = {"ds.class.path": remote_path, "ds.class": ds_class_name}

        # Copy dataset to host
        logger.info(self._th_prefix() + "Prepare dataset with combination " +
                    str(self.comb_manager.get_ds_parameters(comb)))

        copy_code = TaktukPut([self.div_p2p.host], [local_path], remote_path)
        copy_code.run()

        # Notify stats manager
        self.stats_manager.add_ds(self.ds_id, comb)

        return ds_comb
Exemplo n.º 29
0
 def plot_state(self, boxes, deltas, name = '', outdir = None):
     """ Make a graph of a given state """
     graph = Dot(graph_type='digraph', fontname="Verdana", size="10, 5", fixedsize= True)
     i_box = 0
     for box in boxes:            
         textcolor = 'white' if sum( [ self.color_chars.index(col) for col in self.plots_conf[box]['color'].split('#')[1] ] ) < 35 else 'black' 
         node_box = Node(box, style="filled", label = '<<font POINT-SIZE="10" color="'+textcolor+'">'+box+'<br/> '+
                         "%.7f" % round(deltas[i_box], 7)+'</font>>',
             fillcolor = self.plots_conf[box]['color'], shape = self.plots_conf[box]['shape'])
         i_box += 1
         graph.add_node(node_box)
         
     for box_from, boxes_to in self.Flux.iteritems():
         for box_to, flux in boxes_to.iteritems():
             if flux !=0:
                 if flux > 0:
                     edge = Edge(box_from, box_to,  label = '<<font POINT-SIZE="10">'+str(flux)+'</font>>')
                 elif flux < 0:
                     edge = Edge(box_to, box_from,  label = '<<font POINT-SIZE="10">'+str(flux)+'</font>>')                
                 graph.add_edge(edge)
     
     if outdir is None:
         outdir = self.result_dir
         
     outfile = outdir+'/state'+name+'.png'
     graph.write_png(outfile)
     logger.info('State has been saved to '+set_style(outfile, 'emph'))
Exemplo n.º 30
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load cluster properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        # Configure master and slaves
        self.hosts = hosts
        self.master = hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)
        # Store cluster information
        self.host_clusters = {}
        for h in self.hosts:
            g5k_cluster = get_host_cluster(h)
            if g5k_cluster in self.host_clusters:
                self.host_clusters[g5k_cluster].append(h)
            else:
                self.host_clusters[g5k_cluster] = [h]

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([style.user2(k) + ': ' +
                              ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) 
                              for k, v in t.iteritems()])
        
        logger.info("Hadoop cluster created with master %s, hosts %s and topology %s",
                    style.host(self.master.address), 
                    ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]),
                    log_topo)
Exemplo n.º 31
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load cluster properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        # Configure master and slaves
        self.hosts = hosts
        self.master = hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)
        # Store cluster information
        self.host_clusters = {}
        for h in self.hosts:
            g5k_cluster = get_host_cluster(h)
            if g5k_cluster in self.host_clusters:
                self.host_clusters[g5k_cluster].append(h)
            else:
                self.host_clusters[g5k_cluster] = [h]

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([style.user2(k) + ': ' +
                              ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) 
                              for k, v in t.iteritems()])
        
        logger.info("Hadoop cluster created with master %s, hosts %s and topology %s",
                    style.host(self.master.address), 
                    ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]),
                    log_topo)
Exemplo n.º 32
0
 def define_parameters(self):
     """ """
     parameters = self.get_parameters("conf.xml")
     sweeps = sweep(parameters)
     self.sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"),
                                 sweeps)
     logger.info('Number of parameters combinations %s',
                 len(self.sweeper.get_remaining()))
Exemplo n.º 33
0
def update_config_state():
    """
    Update STATE['config'] with the config file options
    """
    config_file = STATE['config_file']
    with open(config_file, 'r') as f:
        STATE['config'].update(yaml.load(f))
    logger.info("Reloaded config %s", STATE['config'] )
Exemplo n.º 34
0
def update_config_state():
    """
    Update STATE['config'] with the config file options
    """
    config_file = STATE['config_file']
    with open(config_file, 'r') as f:
        STATE['config'].update(yaml.load(f))
    logger.info("Reloaded config %s", STATE['config'])
Exemplo n.º 35
0
    def _remove_xp_output(self):
        """Remove experiment's output."""

        if self.remove_output:
            logger.info("Remove output")
            self.hc.execute(
                "fs -rmr " + self.macro_manager.test_macros["xp.output"],
                verbose=False)  # TODO: what happens if not specified?
Exemplo n.º 36
0
    def _remove_xp_output(self):
        """Remove experiment's output."""

        if self.remove_output:
            logger.info("Remove output")
            self.hc.execute("fs -rmr " +
                            self.macro_manager.test_macros["xp.output"],
                            verbose=False)  # TODO: what happens if not specified?
Exemplo n.º 37
0
    def _copy_xp_stats(self):
        """Copy job stats and clean them in the cluster."""

        if self.stats_path:
            local_path = os.path.join(self.stats_path, str(self.comb_id))
            logger.info("Copying stats to " + local_path)
            self.hc.stop()
            self.hc.copy_history(local_path)
            self.hc.clean_history()
Exemplo n.º 38
0
 def submit_job(self, comb):
     """Use the batch script"""
     logger.info('Submiting job on '+ jobserver)
     comb_dir = parent_dir + slugify(comb) + '/'
     job_sub = sp.Popen('cd ' + comb_dir +
                          ' ; /usr/local/bin/qsub /home/stephane/ExamplePBS/batch_single',
                          shell=True,
                          stdout=sp.PIPE, stderr=sp.STDOUT)
     return job_sub.stdout.readlines()[-1].split('.')[0]
    def setup_result_dir(self):
        is_a_test = self.options.is_a_test

        run_type = ""
        if is_a_test:
            run_type = "test_"
        self.result_dir = script_path + '/' + run_type + 'results_' + \
            time.strftime("%Y-%m-%d--%H-%M-%S")
        logger.info('resutlt directory: {}'.format(self.result_dir))
    def setup_result_dir(self):
        is_a_test = self.options.is_a_test

        run_type = ""
        if is_a_test:
            run_type = "test_"
        self.result_dir = script_path + '/' + run_type + 'results_' + \
            time.strftime("%Y-%m-%d--%H-%M-%S")
        logger.info('resutlt directory: {}'.format(self.result_dir))
Exemplo n.º 41
0
    def _copy_xp_stats(self):
        """Copy job stats and clean them in the cluster."""

        if self.stats_path:
            local_path = os.path.join(self.stats_path, str(self.comb_id))
            logger.info("Copying stats to " + local_path)
            self.hc.stop()
            self.hc.copy_history(local_path)
            self.hc.clean_history()
Exemplo n.º 42
0
 def create_paramsweeper(self):
     """Generate an iterator over combination parameters"""
     if self.parameters is None:
         parameters = self.define_parameters()
     logger.detail(pformat(parameters))
     sweeps = sweep(parameters)
     logger.info('% s combinations', len(sweeps))
     self.sweeper = ParamSweeper(path.join(self.result_dir, "sweeps"),
                                 sweeps)
Exemplo n.º 43
0
 def create_paramsweeper(self):
     """Generate an iterator over combination parameters"""
     if self.parameters is None:
         parameters = self.define_parameters()
     logger.detail(pformat(parameters))
     sweeps = sweep(parameters)
     logger.info('% s combinations', len(sweeps))
     self.sweeper = ParamSweeper(path.join(self.result_dir, "sweeps"),
                                 sweeps)
Exemplo n.º 44
0
    def submit_job(self, comb):
        """Use the batch script on psmn"""
        logger.info('Submit job on '+ jobserver)
        comb_dir = parent_dir + slugify(comb) + '/'
        job_sub = SshProcess('cd ' + comb_dir +
                             ' ; /usr/local/bin/qsub /home/stephane/ExamplePBS/batch_single',
                             jobserver).run()

        return job_sub.stdout.splitlines()[-1].split('.')[0]
Exemplo n.º 45
0
    def stop(self):
        self._check_initialization()

        logger.info("Stopping Cassandra")

        self.running_cassandra = False
        self.running = False

        pass
Exemplo n.º 46
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(
                ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr
Exemplo n.º 47
0
    def define_parameters(self):
        nbNodes = len(self.cluster)
        # build parameters and make nbCore list per benchmark
        freqList = [2534000, 2000000, 1200000]
        n_nodes = float(len(self.cluster))
        max_core = SshProcess('cat /proc/cpuinfo | grep -i processor |wc -l',
                              self.cluster[0],
                              connection_params={
                                  'user': '******'
                              }).run().stdout
        max_core = n_nodes * float(max_core)
        even = filter(
            lambda i: i > n_nodes,
            list(takewhile(lambda i: i < max_core,
                           (2**i for i in count(0, 1)))))
        powerTwo = filter(
            lambda i: i > n_nodes,
            list(takewhile(lambda i: i < max_core,
                           (i**2 for i in count(0, 1)))))

        # Define parameters
        self.parameters = {
            'Repeat': [1],
            "Freq": [2534000],
            "NPBclass": ['C'],
            "Benchmark": {
                # 'ft': {
                #     'n_core': even
                #     },
                # 'ep': {
                #     'n_core': even
                #     },
                # 'lu': {
                #     'n_core': even
                #     },
                # 'is': {
                #     'n_core': even
                #     },
                # 'sg': {
                #     'n_core': even
                #     },
                # 'bt': {
                #     'n_core': powerTwo
                #     },
                'sp': {
                    'n_core': powerTwo
                }
            }
        }

        logger.info(self.parameters)
        # make all possible parameters object,
        self.sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"),
                                    sweep(self.parameters))
        logger.info('Number of parameters combinations %s',
                    len(self.sweeper.get_remaining()))
Exemplo n.º 48
0
    def workflow(self, comb):
        self.create_par_file(comb)
        job_id = self.submit_job(comb)
        logger.info('Combination %s will be treated by job %s',
                    slugify(comb), str(job_id))

        while self.is_job_running(job_id):
            sleep(10)

        self.sweeper.done(comb)
Exemplo n.º 49
0
 def _get_primary_vlan(self):
     """
     Returns the primary vlan
     It's the vlan where node are put in when deploying
     """
     vlan = None
     if len(self.vlans) > 0:
         vlan = self.vlans[0]
         logger.info("Using vlan %s" % str(vlan))
     return vlan
Exemplo n.º 50
0
    def execute_job(self, job, node=None, verbose=True):
        """Execute the given Spark job in the specified node.

        Args:
          job (SparkJob):
            The job object.
          node (Host, optional):
            The host were the command should be executed. If not provided,
            self.master is chosen.
          verbose (bool, optional):
            If True stdout and stderr of remote process is displayed.

        Returns (tuple of str):
          A tuple with the standard and error outputs of the process executing
          the job.
        """

        if not self.running:
            logger.warn("The cluster was stopped. Starting it automatically")
            self.start()

        if node is None:
            node = self.master

        exec_dir = "/tmp"

        # Copy necessary files to cluster
        files_to_copy = job.get_files_to_copy()
        action = Put([node], files_to_copy, exec_dir)
        action.run()

        # Get command
        command = job.get_command(exec_dir)

        # Execute
        logger.info("Executing spark job. Command = {" + self.bin_dir +
                    "/spark-submit " + command + "} in " + str(node))

        proc = SshProcess(self.bin_dir + "/spark-submit " + command, node)

        if verbose:
            red_color = '\033[01;31m'

            proc.stdout_handlers.append(sys.stdout)
            proc.stderr_handlers.append(ColorDecorator(sys.stderr, red_color))

        proc.start()
        proc.wait()

        # Get job info
        job.stdout = proc.stdout
        job.stderr = proc.stderr
        job.success = (proc.exit_code == 0)

        return proc.stdout, proc.stderr
Exemplo n.º 51
0
    def clean_data(self):
        """Remove all data used by Hive"""

        logger.info("Cleaning data")

        # Warehouse
        self.hc.execute("fs -rm -r /user/hive/warehouse", verbose=False)

        # Metastore
        # TODO
        shutil.rmtree(self.metastore_dir)
Exemplo n.º 52
0
    def define_parameters(self):
        """ """
        parameters = {
	  'blas' : ['none','mkl','atlas','openblas'],
	  'experiment' : ['aevol','raevol'],
	  'compilator' : ['gcc','intel'],
	  'parallel' : ['openmp','tbb']
        }
        sweeps = sweep(parameters)
        self.sweeper = ParamSweeper(os.path.join(self.result_dir, "sweeps"), sweeps)
        logger.info('Number of parameters combinations %s', len(self.sweeper.get_remaining()))
Exemplo n.º 53
0
    def _create_warehouse(self):
        """ """

        if not self.hc.running:
            logger.warn("Hadoop must be started first")
            self.hc.start_and_wait()

        logger.info("Creating warehouse dirs in HDFS")
        self.hc.execute("fs -mkdir -p /tmp", verbose=False)
        self.hc.execute("fs -mkdir -p /user/hive/warehouse", verbose=False)
        self.hc.execute("fs -chmod g+w /tmp", verbose=False)
        self.hc.execute("fs -chmod g+w /user/hive/warehouse", verbose=False)
Exemplo n.º 54
0
def run_bench(output_folder, node):
    # debian part
    logger.info("Starting debian benchs...")
    debian_folder = create_subdir(output_folder, "debian")
    debian_bench_command = "../../tools/benchs.sh \"commands/omp-tasks/debian-omp-tasks\" {} 10 \"1 2 4 8 16\"".format(debian_folder)
    debian_bench = Remote('cd ./unikernel-tools/benchs/bots && {}'.format(debian_bench_command), node).run()

    # hermitux part
    logger.info("Starting hermitux benchs...")
    hermitux_folder = create_subdir(output_folder, "hermitux")
    hermitux_bench_command = "../../tools/benchs.sh \"commands/omp-tasks/hermitux-omp-tasks\" {} 10 \"1 2 4 8 16\"".format(hermitux_folder)
    hermitux_bench = Remote('cd ./unikernel-tools/benchs/bots && {}'.format(hermitux_bench_command), node).run()
Exemplo n.º 55
0
    def build_roles(self):
        """
        Returns a dict that maps each role to a list of G5K nodes::

          { 'controller': [paravance-1, paravance-5], 'compute': [econome-1] }
        """
        def mk_pools():
            "Indexes each node by its cluster to construct pools of nodes."
            pools = {}
            for cluster, nodes in groupby(
                    self.deployed_nodes, lambda node: node.address.split('-')[0]):
                pools.setdefault(cluster, []).extend(list(nodes))

            return pools

        def pick_nodes(pool, n):
            "Picks n node in a pool of nodes."
            nodes = pool[:n]
            del pool[:n]
            return nodes

        # Maps a role (eg, controller) with a list of G5K node
        roles_set = set()
        for roles in self.config['resources'].values():
            roles_set.update(roles.keys())
        roles = {k: [] for k in roles_set}
        roles_goal = {k: 0 for k in roles_set}

        # compute the aggregated number of nodes per roles
        for r in self.config['resources'].values():
            for k,v in r.items():
                roles_goal[k] = roles_goal[k] + v

        pools = mk_pools()
        for cluster, rs in self.config['resources'].items():
            current = pick_nodes(pools[cluster], 1)
            # distribute node into roles
            for r in rs.keys() * len(self.deployed_nodes):
                if current == []:
                    break
                if current != [] and len(roles[r]) < roles_goal[r]:
                    roles.setdefault(r, []).extend(current)
                    current = pick_nodes(pools[cluster], 1)

        logger.info("Roles: %s" % pf(roles))
        at_least_one = all(len(n) >= 1 for n in roles.values())
        if not at_least_one:
            # Even if we aren't in strict mode we garantee that
            # there will be at least on node per role
            raise Exception("Role doesn't have at least one node each")

        return roles
Exemplo n.º 56
0
    def run(self):
        """ """
        try:
            # Creation of the main iterator which is used for the first control loop.
            self.define_parameters()
	    
            # While there are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:
	      comb = self.sweeper.get_next()
              if comb:
		self.workflow(comb)
	finally:
	  logger.info("Compilation DONE")
Exemplo n.º 57
0
    def xp(self, comb):
        comb_ok = False
        try:
            """ tout ton xp """

            comb_ok = True
        finally:
            if comb_ok:
                self.sweeper.done(comb)
            else:
                self.sweeper.cancel(comb)
            logger.info(style.step('%s Remaining'),
                        len(self.sweeper.get_remaining()))
    def create_paramsweeper(self):
        """Test all the sites, with or without a KaVLAN and for several env."""
        params = {
            "version": ['kadeploy3-dev', 'kadeploy3'],
            "kavlan": [True, False],
            "site": get_g5k_sites(),
            "n_nodes": [1, 4, 10],
            "env": ['wheezy-x64-base', 'wheezy-x64-prod', 'wheezy-x64-xen']
        }
        logger.info('Defining parameters: %s', pformat(params))

        combs = sweep(params)
        return ParamSweeper(self.result_dir + "/sweeper", combs)