def compute_coorm_slots(planning, excluded_elements=None): """ """ slots = [] limits = _slots_limits(planning) for start in limits: stop = 10**25 free_cores = {'grid5000': 0} for site, site_planning in planning.items(): free_cores[site] = 0 for cluster, cluster_planning in site_planning.items(): free_cores[cluster] = 0 if cluster in get_g5k_clusters(queues=None): for host, host_planning in cluster_planning.items(): for free_slot in host_planning['free']: if free_slot[0] <= start and free_slot[0] < stop: free_cores[cluster] += get_host_attributes( host)['architecture']['nb_cores'] free_cores[site] += get_host_attributes( host)['architecture']['nb_cores'] free_cores['grid5000'] += get_host_attributes( host)['architecture']['nb_cores'] if free_slot[1] < stop: stop = free_slot[1] slots.append((start, stop, free_cores)) return slots
def __init__(self, hosts, topo_list=None): """Create a Hadoop topology object assigning each host to the corresponding rack. Args: hosts (list of Host): The hosts to be assigned a topology. topo_list (list of str, optional): The racks to be assigned to each host. len(hosts) should be equal to len(topo_list). """ if topo_list: if len(hosts) == len(topo_list): self.topology = topo_list return else: logger.warn("hosts and topology have not the same length.") logger.info("Discovering topology automatically") self.topology = {} for h in hosts: nw_adapters = get_host_attributes(h)[u'network_adapters'] for nwa in nw_adapters: if (u'network_address' in nwa and nwa[u'network_address'] == h.address): self.topology[h] = "/" + nwa[u'switch'] break
def get_fastest_host(hosts): """ Use the G5K api to have the fastest node""" hosts_attr = {'TOTAL': {'CPU': 0, 'RAM': 0}} cluster_attr = {} for host in hosts: if isinstance(host, Host): host = host.address cluster = get_host_cluster(host) if cluster not in cluster_attr: attr = get_host_attributes(host) cluster_attr[cluster] = { 'CPU': attr['architecture']['nb_cores'], 'RAM': int(attr['main_memory']['ram_size'] / 10 ** 6), 'flops': attr['performance']['node_flops']} hosts_attr[host] = cluster_attr[cluster] hosts_attr['TOTAL']['CPU'] += attr['architecture']['nb_cores'] hosts_attr['TOTAL']['RAM'] += int(attr['main_memory']['ram_size'] \ / 10 ** 6) max_flops = -1 for host in hosts: if isinstance(host, Host): host = host.address flops = hosts_attr[host]['flops'] if flops > max_flops: max_flops = flops fastest_host = host return fastest_host
def get_fastest_host(hosts): """ Use the G5K api to have the fastest node""" hosts_attr = {'TOTAL': {'CPU': 0, 'RAM': 0}} cluster_attr = {} for host in hosts: if isinstance(host, Host): host = host.address cluster = get_host_cluster(host) if cluster not in cluster_attr: attr = get_host_attributes(host) cluster_attr[cluster] = { 'CPU': attr['architecture']['nb_cores'], 'RAM': int(attr['main_memory']['ram_size'] / 10**6), 'flops': attr['performance']['node_flops'] } hosts_attr[host] = cluster_attr[cluster] hosts_attr['TOTAL']['CPU'] += attr['architecture']['nb_cores'] hosts_attr['TOTAL']['RAM'] += int(attr['main_memory']['ram_size'] \ / 10 ** 6) max_flops = -1 for host in hosts: if isinstance(host, Host): host = host.address flops = hosts_attr[host]['flops'] if flops > max_flops: max_flops = flops fastest_host = host return fastest_host
def get_host_cores(cluster): hosts = utils.get_cluster_hosts(cluster) # (suppose) all hosts are the same in a cluster attributes = utils.get_host_attributes(hosts[-1]) processors = attributes['architecture']['nb_procs'] cores = attributes['architecture']['nb_cores'] # number of cores as reported in the Website return cores * processors
def _configure_servers(self, hosts=None): """Configure servers and host-dependant parameters. Args: hosts (list of Host, optional): The list of hosts to take into account in the configuration. If not specified, all the hosts of the Hadoop cluster are used. The first host of this list is always used as the reference. """ if not hosts: hosts = self.hosts host_attrs = get_host_attributes(hosts[0]) num_cores = host_attrs[u'architecture'][u'smt_size'] total_memory_mb = (int(host_attrs[u'main_memory'][u'ram_size']) / (1024 * 1024)) - 2 * 1024 mem_per_slot_mb = total_memory_mb / (num_cores - 1) replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE), "fs.default.name", "hdfs://" + self.master.address + ":" + str(self.hdfs_port) + "/", True) replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE), "hadoop.tmp.dir", self.hadoop_temp_dir, True) replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE), "topology.script.file.name", self.conf_dir + "/topo.sh", True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.job.tracker", self.master.address + ":" + str(self.mapred_port), True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.tasktracker.map.tasks.maximum", str(num_cores - 1), True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.tasktracker.reduce.tasks.maximum", str(num_cores - 1), True) if mem_per_slot_mb <= 0: logger.warn("Memory is negative, no setting") else: replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.child.java.opts", "-Xmx" + str(mem_per_slot_mb) + "m", True)
def _configure_servers(self, hosts=None): """Configure servers and host-dependant parameters. Args: hosts (list of Host, optional): The list of hosts to take into account in the configuration. If not specified, all the hosts of the Hadoop cluster are used. The first host of this list is always used as the reference. """ if not hosts: hosts = self.hosts host_attrs = get_host_attributes(hosts[0]) num_cores = host_attrs[u'architecture'][u'smt_size'] total_memory_mb = (int(host_attrs[u'main_memory'][u'ram_size']) / (1024 * 1024)) - 2 * 1024 mem_per_slot_mb = total_memory_mb / (num_cores - 1) replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE), "fs.default.name", "hdfs://" + self.master.address + ":" + str(self.hdfs_port) + "/", True) replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE), "hadoop.tmp.dir", self.hadoop_temp_dir, True) replace_in_xml_file(os.path.join(self.temp_conf_dir, CORE_CONF_FILE), "topology.script.file.name", self.conf_dir + "/topo.sh", True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.job.tracker", self.master.address + ":" + str(self.mapred_port), True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.tasktracker.map.tasks.maximum", str(num_cores - 1), True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.tasktracker.reduce.tasks.maximum", str(num_cores - 1), True) replace_in_xml_file(os.path.join(self.temp_conf_dir, MR_CONF_FILE), "mapred.child.java.opts", "-Xmx" + str(mem_per_slot_mb) + "m", True)
def get_CPU_RAM_FLOPS(hosts): """Return the number of CPU and amount RAM for a host list """ hosts_attr = {'TOTAL': {'CPU': 0, 'RAM': 0}} cluster_attr = {} for host in hosts: if isinstance(host, Host): host = host.address cluster = get_host_cluster(host) if cluster not in cluster_attr: attr = get_host_attributes(host) cluster_attr[cluster] = { 'CPU': attr['architecture']['nb_cores'], 'RAM': int(attr['main_memory']['ram_size'] / 10 ** 6), 'flops': attr['performance']['node_flops']} hosts_attr[host] = cluster_attr[cluster] hosts_attr['TOTAL']['CPU'] += attr['architecture']['nb_cores'] hosts_attr['TOTAL']['RAM'] += int(attr['main_memory']['ram_size'] \ / 10 ** 6) logger.debug(hosts_list(hosts_attr)) return hosts_attr
def get_CPU_RAM_FLOPS(hosts): """Return the number of CPU and amount RAM for a host list """ hosts_attr = {'TOTAL': {'CPU': 0, 'RAM': 0}} cluster_attr = {} for host in hosts: if isinstance(host, Host): host = host.address cluster = get_host_cluster(host) if cluster not in cluster_attr: attr = get_host_attributes(host) cluster_attr[cluster] = { 'CPU': attr['architecture']['smt_size'], 'RAM': int(attr['main_memory']['ram_size'] / 10 ** 6), 'flops': attr['performance']['node_flops']} hosts_attr[host] = cluster_attr[cluster] hosts_attr['TOTAL']['CPU'] += attr['architecture']['smt_size'] hosts_attr['TOTAL']['RAM'] += int(attr['main_memory']['ram_size'] \ / 10 ** 6) logger.debug(hosts_list(hosts_attr)) return hosts_attr
def benchmark_metrics(self): nb_diet_success = 0 nb_diet_error = 0 nb_diet_nofound = 0 bench_size = "regular" log_repository = "/root/dietg/log/" servers = [host for host in self.servers] logger.info("Clear bench files") cmd = "rm "+log_repository+"/flops_watts.bench 2> /dev/null; "+"rm "+log_repository+"/conso.bench 2> /dev/null; "+"rm "+log_repository+"/flops.bench 2> /dev/null; " a = Remote(cmd, servers, connection_params = root_connection_params).run() # for s in a.processes: # pout = s.stdout # logger.debug(pout) # # Initialise the clients clients = [self.clients] logger.info("Initialize client on node %s",clients) cmd = "cd "+sched_dir+"; make clean && make" a = Remote(cmd, clients, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.info(pout) cmd = "cd /root/dietg/; ./set_client.sh" a = Remote(cmd, clients, connection_params = root_connection_params).run() for s in a.processes: pout = s.stdout logger.info(pout) logger.info("Benchmark_metrics") logger.info("Another bench just started!") start = time.time() array_process = set() for x in range(len(self.servers)): cmd = "cd "+sched_dir+"; ./client_"+bench_size a = Remote(cmd, clients, connection_params = root_connection_params).start() array_process.add(a) for process in array_process: process.wait() for s in process.processes: pout = s.stdout logger.debug(pout) if "no server found" in pout: nb_diet_nofound += 1 elif "diet call error" in pout: nb_diet_error += 1 elif "diet call success" in pout: nb_diet_success += 1 logger.info("All the bench are terminated (success = %s) | (error =%s) | (no server found = %s)",str(nb_diet_success),str(nb_diet_error),str(nb_diet_nofound)) end = time.time() makespan = end - start; if makespan < 5: logger.info("Benchmark has failed to execute! Another try will occur!") return False logger.info("Total makespan = %d",makespan) resolution = 15 # Electric consumption_bench metrics / CONSO logger.info("Retrieve consumption_bench per SeD") bench_file = "conso.bench" for sed in self.servers: self.consumption_bench[sed] = get_g5k_api_measures(sed, get_host_site(sed), "pdu", start, end, resolution) logger.debug("Electric Consumption of %s = %sW (%sJ)",sed,self.consumption_bench[sed],self.consumption_bench[sed]*makespan) with open(bench_file, "w") as file1: file1.write(str(self.consumption_bench[sed]*makespan)) file1.write("\n") file1.close() os.system("scp "+bench_file+" root@"+sed+":"+log_repository) #.g5k # Performance / FLOPS logger.info("Retrieve FLOPS per SeD") bench_file = "flops.bench" for sed in self.servers: self.flops_bench[sed] = get_host_attributes(sed)['performance']['node_flops'] logger.debug("Flops Number of %s = %s",sed,self.flops_bench[sed]) with open(bench_file, "w") as file1: file1.write(str(self.flops_bench[sed])) file1.write("\n") file1.close() os.system("scp "+bench_file+" root@"+sed+":"+log_repository+" > /dev/null") # Flops per Watts logger.info("Retrieve FLOPS/Watts per SeD") bench_file = "flops_watts.bench" for sed in self.servers: self.flops_watts_bench[sed] = self.flops_bench[sed] / self.consumption_bench[sed] logger.debug("flops_watt of %s = %s",sed,self.flops_watts_bench[sed]) with open(bench_file, "w") as file1: file1.write(str(self.flops_watts_bench[sed])) file1.write("\n") file1.close() os.system("scp "+bench_file+" root@"+sed+":"+log_repository+" > /dev/null") logger.info("Benchmark_metrics termine") return True
def get_node_name(host): for netadap in get_host_attributes(host)['network_adapters']: if netadap.has_key('network_address'): return netadap['network_address']