def __init__(self, name, flavor_id, image_id, create=False, wait=False, IPv4=False, log_path=LOGS_DIR): """ VM class constructor """ #set attributes self.created = False self.name = name self.flavor_id = flavor_id self.log_path = log_path self.image_id = image_id self.public_addresses = [] self.addresses = [] self.id = -1 self.IPv4 = IPv4 if not exists(LOGS_DIR): makedirs(LOGS_DIR) self.logfile = "%s/%s.log" % (LOGS_DIR, self.name) self.log = get_logger('[%s]' % self.name, 'INFO', logfile=self.logfile) if create: self.create(wait)
def __init__(self): super(Clients, self).__init__() self.cluster_name = "clients" self.node_type = "client" # the save file for saving/reloading the active cluster self.save_file = home+"files/saved_%s_cluster.json" % self.cluster_name # the logger for this file self.log = get_logger('CLIENTS', 'INFO', logfile=home+'files/logs/Coordinator.log')
def __init__(self): super(Clients, self).__init__() self.cluster_name = "clients" self.node_type = "client" # the save file for saving/reloading the active cluster self.save_file = home + "files/saved_%s_cluster.json" % self.cluster_name # the logger for this file self.log = get_logger('CLIENTS', 'INFO', logfile=home + 'files/logs/Coordinator.log')
def __init__(self, monitoring_address, monitoring_port=8649): self.ganglia_host = monitoring_address self.ganglia_port = monitoring_port ## Install logger self.my_logger = get_logger("MonitorVMs", "INFO", logfile=LOG_FILENAME) self.allmetrics = {} self.parser = GParser() # initialize parser object. in the refreshMetrics function call the .parse of the # parser to update the dictionary object. self.refreshMetrics()
def __init__(self, monitoring_address, monitoring_port=8649): self.ganglia_host = monitoring_address self.ganglia_port = monitoring_port ## Install logger self.my_logger = get_logger("MonitorVMs", "INFO", logfile=LOG_FILENAME) self.allmetrics={} self.parser = GParser() # initialize parser object. in the refreshMetrics function call the .parse of the # parser to update the dictionary object. self.refreshMetrics()
def get_all_vms(check_active=False): """ Creates VM instances for all the VMs of the user available in the IaaS """ log = get_logger("VM [static]", 'INFO') log.debug("getting all VMs") vms = [] vm_ids = iaas.get_all_vm_ids() for vm_id in vm_ids: vm = VM.vm_from_dict(iaas.get_vm_details(vm_id)) if check_active and vm.get_cloud_status() != "ACTIVE": continue else: vms.append(vm) return vms
def __init__(self): # 10 mins later (12 ticks per minute) self.projection_time = pred_vars['projection_time'] self.use_sampling = pred_vars['use_sampling'] self.sampling = pred_vars['sampling'] self.measurements_file = env_vars['measurements_file'] self.predictions_file = pred_vars['predictions_file'] # measurements of latest minutes will be used in regression self.latest = pred_vars['use_latest_meas'] self.degree = pred_vars['regression_degree'] # store the current minute self.curr_min = 0.0 #Create logger LOG_FILENAME = 'files/logs/Coordinator.log' self.log = get_logger('Predictor', 'INFO', logfile=LOG_FILENAME)
seeds = [] # the seed node(s) of the casssandra cluster !!! ONLY ONE IS SUPPORTED !!! nodes = [] # the rest of the nodes of the Cassandra cluster stash = [] # list of the Nodes that are available (active) but not used # the name of the cluster is used as a prefix for the VM names cluster_name = env_vars['active_cluster_name'] # the save file for saving/reloading the active cluster save_file = "files/saved_%s_cluster.json" % cluster_name # the flavor and image for the VMs used int the cluster Node.flavor = env_vars["default_flavor"] Node.image = env_vars["cassandra_base_image"] log = get_logger('CASSANDRA', 'DEBUG', logfile='files/logs/Coordinator.log') def create_cluster(worker_count=0): """ Creates a Cassandra Cluster with a single Seed Node and 'worker_count' other nodes :param worker_count: the number of the nodes to create-apart from the seednode """ global nodes, stash, seeds nodes = [] seeds = [] stash = [] #create the seed node seeds.append(Node(cluster_name, node_type="seed", number=0, create=True, IPv4=True)) #create the rest of the nodes for i in range(worker_count):
__author__ = 'cmantas' from time import sleep import CassandraCluster as Servers from ClientsCluster import my_Clients as Clients from lib.persistance_module import env_vars, home from Monitoring import MonitorVms from new_decision_module import RLDecisionMaker as DM from lib.tiramola_logging import get_logger from time import time from os import remove from threading import Thread ####### STATIC VARS ############### my_logger = get_logger('COORDINATOR', 'INFO', logfile=home+'files/logs/Coordinator.log') my_logger.debug("--------- NEW RUN -----------------") #the (pending) decision at the present moment decision = None running_process=None #get the endpoint for the monitoring system monitor_clients = MonitorVms(Clients.get_monitoring_endpoint()) monitor_servers = MonitorVms(Servers.get_monitoring_endpoint()) error = None #check if cluster exists if Servers.exists(): my_logger.info( "Cluster exists using it as is") #make sure no workload is running else: my_logger.error("Create the cluster first and then run the coordinator") exit(-1)
class Cluster(object): # the logger for this file log = get_logger('CLUSTER', 'DEBUG', logfile=home+'files/logs/Coordinator.log') def __init__(self): self.all_nodes = [] self.log = Cluster.log # the name of the cluster is used as a prefix for the VM names self.cluster_name = "cluster" pass @staticmethod def wait_proc(proc, node, timeout, log=None): """ Waits for a process to finish running for a given timeout and throws an exception if not finished :param proc: :param node: :return: """ proc.join(timeout) #check if it has not finished yet fail if so if proc.is_alive(): if not log is None: log.error("Timeout occurred for process") proc.terminate() raise Exception("Script timed out for "+node.name) elif not log is None: log.debug(node.name+" DONE") @staticmethod def run_script(script_content, nodes, serial=True, timeout=600, log=None): """ Runs a script to the specified VMs :param script_content: :param serial: :param timeout: :return: None """ if not log is None: log.info('Running a script to %d nodes' % len(nodes)) procs = [] #start the procs that add the nodes for node in nodes: p = Process(target=node.run_command, args=(script_content,)) procs.append(p) p.start() if serial: # if adding in serial, wait each proc if not log is None:log.debug("waiting for node #"+node.name) Cluster.wait_proc(p, node, timeout) if not serial: #wait for all the procs to finish in parallel if not log is None:log.debug("Waiting for all the procs to finish") for i in range(len(nodes)): Cluster.wait_proc(procs[i], nodes[i], timeout) if not log is None: log.info("Finished running script") def run_to_all(self, script_content, serial=True, timeout=600): """ Runs a script to all the nodes in the cluster :param script_content: :param serial: :param timeout: """ self.run_script(script_content, self.all_nodes, serial, timeout, self.log) def wait_everybody(self): """ Waits for all the Nodes in the cluster to be SSH-able """ self.log.info('Waiting for SSH on all nodes') for i in self.all_nodes: i.wait_ready() def bootstrap_cluster(self): """ Runs the necessary boostrap commnands to each of the Seed Node and the other nodes """ for n in self.all_nodes: n.bootstrap() self.inject_hosts_files() def kill_nodes(self): """ Runs the kill scripts for all the nodes in the cluster """ self.log.info("Killing nodes") for n in self.all_nodes: n.kill() def update_hostfiles(self, servers): if not env_vars["update_hostfiles"]: self.log.info("Not updtading ycsb client host files") return self.log.info("updating hostfiles") # generate ycsb-specific hosts file text host_text = "" if "cassandra_seednode" in servers.keys(): del servers["cassandra_seednode"] #generate the "hosts" text for YCSB for key, value in servers.iteritems(): host_text += value+"\n" host_text = host_text[:-1] # remove trailing EOL #DEBUG keep just one host #host_text = servers["cassandra_node_01"] command = "echo '%s' > /opt/hosts;" % host_text self.run_script(command, self.all_nodes, serial=False) def get_hosts(self, string=False, private=False): """ Produces a mapping of hostname-->IP for the nodes in the cluster :param include_clients: if False (default) the clients are not included :param string: if True the output is a string able to be appended in /etc/hosts :return: a dict or a string of hostnames-->IPs """ hosts = dict() for i in self.all_nodes: if private: hosts[i.name] = i.get_private_addr() else: hosts[i.name] = i.get_public_addr() return hosts def node_count(self): return len(self.all_nodes) def exists(self): if len(self.all_nodes) == 0: return False else: return True def get_monitoring_endpoint(self): """ returns the IP of the node that has the monitoring data we want """ return self.all_nodes[0].get_public_addr()
def __init__(self, cluster): #Create logger LOG_FILENAME = 'files/logs/Coordinator.log' self.log = get_logger('RLDecisionMaker', 'INFO', logfile=LOG_FILENAME) self.log.info("Using 'gain' : " + env_vars['gain'] + " with threshold of " + str(env_vars["decision_threshold"] * 100) + "% and interval: " + str(env_vars['decision_interval'])) self.log.info( "Cluster Size from %d to %d nodes" % (env_vars['min_cluster_size'], env_vars['max_cluster_size'])) self.debug = False if self.debug: self.currentState = 8 else: self.currentState = cluster.node_count() self.cluster = cluster self.nextState = self.currentState self.waitForIt = env_vars['decision_interval'] / env_vars[ 'metric_fetch_interval'] self.pending_action = None self.decision = {"action": "PASS", "count": 0} # The policy for getting throughput and latency when computing the reward func. # average, centroid self.measurementsPolicy = 'centroid' self.prediction = env_vars['use_prediction'] self.predictor = Predictor() # used only in simulation!! self.countdown = 0 # A dictionary that will remember rewards and metrics in states previously visited self.memory = {} for i in range(env_vars["min_cluster_size"], env_vars["max_cluster_size"] + 1): self.memory[str(i)] = {} #self.memory[str(i)]['V'] = None # placeholder for rewards and metrics self.memory[str(i)]['r'] = None self.memory[str(i)]['arrayMeas'] = None # Load any previous statics. self.measurementsFile = env_vars["measurements_file"] self.trainingFile = env_vars["training_file"] self.sumMetrics = {} # initialize measurements file meas = open(self.measurementsFile, 'a+') if os.stat(self.measurementsFile).st_size == 0: # The file is empty, set the headers for each column. meas.write( 'State\t\tLambda\t\tThroughput\t\tLatency\t\tCPU\t\tTime\n') meas.close() # load training set meas = open(self.trainingFile, 'r+') if os.stat(self.trainingFile).st_size != 0: # Read the training set measurements saved in the file. meas.next() # Skip the first line with the headers of the columns for line in meas: # Skip comments (used in training sets) if not line.startswith('###'): m = line.split('\t\t') self.add_measurement(m) meas.close()
from lib.tiramola_logging import get_logger from os import remove, mkdir, listdir from shutil import move, copy from time import strftime from os.path import isdir, isfile, join, exists, basename from random import random from json import load, dumps, loads from lib.persistance_module import env_vars, reload_env_vars, home from time import sleep from sys import exc_info from ClientsCluster import my_Clients as ClientsCluster import CassandraCluster from VM import Timer ## global logger log = get_logger("EXPERIMENT", 'INFO', logfile=home+'files/logs/Coordinator.log') o_ev = {} measurements_dir = "files/measurements" def list_files(dir_path): """ lists all files (not dirs) in a given directory :param dir_path: :return: """ return [f for f in listdir(dir_path) if isfile(join(dir_path, f))] def wait_get_one(dir_path): """
seeds = [ ] # the seed node(s) of the casssandra cluster !!! ONLY ONE IS SUPPORTED !!! nodes = [] # the rest of the nodes of the Cassandra cluster stash = [] # list of the Nodes that are available (active) but not used # the name of the cluster is used as a prefix for the VM names cluster_name = env_vars['active_cluster_name'] # the save file for saving/reloading the active cluster save_file = "files/saved_%s_cluster.json" % cluster_name # the flavor and image for the VMs used int the cluster Node.flavor = env_vars["default_flavor"] Node.image = env_vars["cassandra_base_image"] log = get_logger('CASSANDRA', 'DEBUG', logfile='files/logs/Coordinator.log') def create_cluster(worker_count=0): """ Creates a Cassandra Cluster with a single Seed Node and 'worker_count' other nodes :param worker_count: the number of the nodes to create-apart from the seednode """ global nodes, stash, seeds nodes = [] seeds = [] stash = [] #create the seed node seeds.append( Node(cluster_name, node_type="seed", number=0, create=True, IPv4=True)) #create the rest of the nodes
def parse_args(): chosen_function = raw_args[1] global args for arg in raw_args[2:]: i = arg.find("=") if i == -1: args[arg] = True else: key = arg[:i] value = arg[i+1:] args[key] = value return chosen_function log = get_logger("CLI", 'INFO') ############################## AVAILABLE ACTIONS ####################################### def info(): print """============== USAGE ================== tiramola hosts tiramola private_hosts tiramola create_cluster nodes=2 clients=2 tiramola bootstrap_cluster used=8 tiramola load_data records=100000 tiramola run_sinusoid target=100 offset=80 period=60 #period time in minutes tiramola add_nodes [count=2] tiramola remove_nodes [count=2] tiramola kill_workload
def __init__(self, cluster): #Create logger LOG_FILENAME = 'files/logs/Coordinator.log' self.log = get_logger('RLDecisionMaker', 'INFO', logfile=LOG_FILENAME) self.log.info("Using 'gain' : " + env_vars['gain'] +" with threshold of "+str( env_vars["decision_threshold"]*100) + "% and interval: " + str(env_vars['decision_interval'])) self.log.info("Cluster Size from %d to %d nodes" % (env_vars['min_cluster_size'], env_vars['max_cluster_size'])) self.debug = False if self.debug: self.currentState = 8 else: self.currentState = cluster.node_count() self.cluster = cluster self.nextState = self.currentState self.waitForIt = env_vars['decision_interval'] / env_vars['metric_fetch_interval'] self.pending_action = None self.decision = {"action": "PASS", "count": 0} # The policy for getting throughput and latency when computing the reward func. # average, centroid self.measurementsPolicy = 'centroid' self.prediction = env_vars['use_prediction'] self.predictor = Predictor() # used only in simulation!! self.countdown = 0 # A dictionary that will remember rewards and metrics in states previously visited self.memory = {} for i in range(env_vars["min_cluster_size"], env_vars["max_cluster_size"] + 1): self.memory[str(i)] = {} #self.memory[str(i)]['V'] = None # placeholder for rewards and metrics self.memory[str(i)]['r'] = None self.memory[str(i)]['arrayMeas'] = None # Load any previous statics. self.measurementsFile = env_vars["measurements_file"] self.trainingFile = env_vars["training_file"] self.sumMetrics = {} # initialize measurements file meas = open(self.measurementsFile, 'a+') if os.stat(self.measurementsFile).st_size == 0: # The file is empty, set the headers for each column. meas.write('State\t\tLambda\t\tThroughput\t\tLatency\t\tCPU\t\tTime\n') meas.close() # load training set meas = open(self.trainingFile, 'r+') if os.stat(self.trainingFile).st_size != 0: # Read the training set measurements saved in the file. meas.next() # Skip the first line with the headers of the columns for line in meas: # Skip comments (used in training sets) if not line.startswith('###'): m = line.split('\t\t') self.add_measurement(m) meas.close()
def parse_args(): chosen_function = raw_args[1] global args for arg in raw_args[2:]: i = arg.find("=") if i == -1: args[arg] = True else: key = arg[:i] value = arg[i + 1:] args[key] = value return chosen_function log = get_logger("CLI", 'INFO') ############################## AVAILABLE ACTIONS ####################################### def info(): print """============== USAGE ================== tiramola hosts tiramola private_hosts tiramola create_cluster nodes=2 clients=2 tiramola bootstrap_cluster used=8 tiramola load_data records=100000 tiramola run_sinusoid target=100 offset=80 period=60 #period time in minutes tiramola add_nodes [count=2] tiramola remove_nodes [count=2] tiramola kill_workload
__author__ = 'cmantas' from time import sleep import CassandraCluster as Servers from ClientsCluster import my_Clients as Clients from lib.persistance_module import env_vars, home from Monitoring import MonitorVms from new_decision_module import RLDecisionMaker as DM from lib.tiramola_logging import get_logger from time import time from os import remove from threading import Thread ####### STATIC VARS ############### my_logger = get_logger('COORDINATOR', 'INFO', logfile=home + 'files/logs/Coordinator.log') my_logger.debug("--------- NEW RUN -----------------") #the (pending) decision at the present moment decision = None running_process = None #get the endpoint for the monitoring system monitor_clients = MonitorVms(Clients.get_monitoring_endpoint()) monitor_servers = MonitorVms(Servers.get_monitoring_endpoint()) error = None #check if cluster exists if Servers.exists(): my_logger.info("Cluster exists using it as is") #make sure no workload is running else: