def get_data(self, node): """ Return telemetry data for the specified node :param node: InfoGraph node :return: pandas.DataFrame """ queries = InfoGraphNode.get_queries(node) ret_val = pandas.DataFrame() try: ret_val = self._get_data(queries) except Exception as ex: LOG.debug("Exception in user code: \n{} {} {}".format('-' * 60), traceback.print_exc(file=sys.stdout), '-' * 60) #ret_val.set_index(keys='timestamp') if InfoGraphNode.node_is_vm(node): if not ret_val.empty: ret_val.columns = tm_utils.clean_vm_telemetry_colnames( ret_val.columns) return ret_val
def run(self, workload, optimal_node_type='machine'): """ Ranks machines by CPU utilization. :param workload: Contains workload related info and results. :return: heuristic results """ workload_config = workload.get_configuration() graph = workload.get_latest_graph() if not graph: raise KeyError('No graph to be processed.') scores = LandscapeScore.utilization_scores(graph) scores_sat = LandscapeScore.saturation_scores(graph) heuristic_results = pd.DataFrame(columns=[ 'node_name', 'type', 'ipaddress', 'compute utilization', 'compute saturation', 'memory utilization', 'memory saturation', 'network utilization', 'network saturation', 'disk utilization', 'disk saturation', ]) heuristic_results_nt = heuristic_results.copy() device_id_col_name = None project = None if workload_config.get('project'): project = workload_config['project'] device_id_col_name = workload_config['project'] + '_device_id' heuristic_results[device_id_col_name] = None telemetry_filter = workload_config.get('telemetry_filter') for node in graph.nodes(data=True): node_name = InfoGraphNode.get_name(node) node_type = InfoGraphNode.get_type(node) list_node_name = node_name if node_type == optimal_node_type: if InfoGraphNode.node_is_vm(node): vm_name = InfoGraphNode.get_properties(node).get('vm_name') if vm_name: list_node_name = vm_name data = { 'node_name': list_node_name, 'type': node_type, 'ipaddress': InfoGraphNode.get_attributes(node).get('ipaddress'), 'compute utilization': scores[node_name]['compute'], 'compute saturation': scores_sat[node_name]['compute'], 'memory utilization': scores[node_name]['memory'], 'memory saturation': scores_sat[node_name]['memory'], 'network utilization': scores[node_name]['network'], 'network saturation': scores_sat[node_name]['network'], 'disk utilization': scores[node_name]['disk'], 'disk saturation': scores_sat[node_name]['disk'] } if device_id_col_name: dev_id = InfoGraphNode.get_properties(node).get( device_id_col_name) if project == 'mf2c': dev_id = dev_id.replace('_', '-') data[device_id_col_name] = dev_id if InfoGraphNode.get_properties(node).get( "telemetry_data") is not None: heuristic_results = heuristic_results.append( data, ignore_index=True) elif not telemetry_filter: heuristic_results_nt = heuristic_results.append( data, ignore_index=True) if not workload.get_workload_name().startswith('optimal_'): if InfoGraphNode.get_type( node ) == "docker_container" and optimal_node_type == 'machine': node_name = InfoGraphNode.get_docker_id(node) heuristic_results = heuristic_results.append( { 'node_name': node_name, 'type': node_type, 'ipaddress': None, 'compute utilization': scores[node_name]['compute'], 'compute saturation': None, 'memory utilization': scores[node_name]['memory'], 'memory saturation': None, 'network utilization': scores[node_name]['network'], 'network saturation': None, 'disk utilization': scores[node_name]['disk'], 'disk saturation': None }, ignore_index=True) sort_fields = ['compute utilization'] sort_order = workload_config.get('sort_order') if sort_order: sort_fields = [] for val in sort_order: if val == 'cpu': sort_fields.append('compute utilization') if val == 'memory': sort_fields.append('memory utilization') if val == 'network': sort_fields.append('network utilization') if val == 'disk': sort_fields.append('disk utilization') heuristic_results_nt = heuristic_results_nt.replace([0], [None]) heuristic_results = heuristic_results.sort_values(by=sort_fields, ascending=True) heuristic_results = heuristic_results.append(heuristic_results_nt, ignore_index=True) workload.append_metadata(self.__filter_name__, heuristic_results) LOG.info('AVG: {}'.format(heuristic_results)) return heuristic_results
def run(self, workload): tmp_path = "/media/iolie/WORK/data/" # Extract data from Info Core service_subgraphs = workload.get_latest_graph() telemetry = {} cols = [] if not service_subgraphs or len(service_subgraphs) == 0: return # first add telemetry data of all nodes to a dictionary print "Data merger started " + str(time.time()) for subgraph in service_subgraphs: for node in subgraph.nodes(data=True): node_id = node[0] node_tm = InfoGraphNode.get_telemetry_data(node) if InfoGraphNode.node_is_vm(node): if not node_tm.empty: node_tm.columns = tm_utils.clean_vm_telemetry_colnames(node_tm.columns) vm_name = InfoGraphNode.get_attributes(node).get("vm_name") if vm_name: node_id = vm_name if not node_tm.empty: tm = telemetry.get(node_id) if not isinstance(tm, pd.DataFrame): #if not tm: telemetry[node_id] = node_tm #telemetry[node_id] = [node_tm] #node_tm.to_csv(tmp_path+node_id, index=False) else: telemetry[node_id] = pd.concat([tm, node_tm]) #telemetry[node_id].append(node_tm) #node_tm.to_csv(tmp_path + node_id, mode='a', header=False, index=False) InfoGraphNode.set_telemetry_data(node, pd.DataFrame()) print "Data merger finished " + str(time.time()) print telemetry.keys() print len(telemetry) # merge subgraphs graph = None counter = 0 for subgraph in service_subgraphs: counter = counter + 1 if not graph and len(subgraph.nodes()) > 0: graph = subgraph elif len(subgraph.nodes()) > 0: graphs.merge_graph(graph, subgraph) #print "Merged {} subgraphs out of {} subgraphs in all".format(counter, len(service_subgraphs)) # merge telemetry data #for key in telemetry.keys(): # val = telemetry[key] # print key + ' {}'.format(len(val)) # if len(val) > 1: # telemetry[key] = pd.concat(val) # elif len(val) == 1: # telemetry[key] = val[0] # print node_id + ', ' + str(time.time()) # print "Merged telemetry data of {} nodes out of {} nodes in all".format(counter, len(telemetry.keys())) # set telemetry data on merged graph for node in graph.nodes(data=True): node_id = node[0] if InfoGraphNode.node_is_vm(node): vm_name = InfoGraphNode.get_attributes(node).get("vm_name") if vm_name: node_id = vm_name tm = telemetry.get(node_id) #try: # tm = pd.read_csv(tmp_path + node_id) #except: # tm = pd.DataFrame() if isinstance(tm, pd.DataFrame): if not tm.empty: InfoGraphNode.set_telemetry_data(node, tm) del telemetry[node_id] # delete telemetry data so that only one copy exists in the graph else: InfoGraphNode.set_telemetry_data(node, None) # print "Set telemetry data of node {}".format(node_id) print "Set telemetry data of merged graph" workload.save_results(self.__filter_name__, graph) return graph