def destroy(self): """Destroy the monitoring stack. This destroys all the container and associated volumes. """ with play_on(pattern_hosts="ui", roles=self._roles) as p: p.docker_container( display_name="Destroying Grafana", name="grafana", state="absent", force_kill=True, ) with play_on(pattern_hosts="agent", roles=self._roles) as p: p.docker_container(display_name="Destroying telegraf", name="telegraf", state="absent") with play_on(pattern_hosts="collector", roles=self._roles) as p: p.docker_container( display_name="Destroying InfluxDB", name="influxdb", state="absent", force_kill=True, ) p.docker_volume( display_name="Destroying associated volumes", name="influxdb-data", state="absent", )
def _get_cpus(self): """Retrieve cpu info of all sensored hosts and put it in dictionaries.""" if self.hostname_to_cpu: ## lazy loading return local_lscpus = './_tmp_enos_/lscpus' remote_lscpu = 'tmp/lscpu' ## #1 remove outdated data if (Path(local_lscpus).exists() and Path(local_lscpus).is_dir()): shutil.rmtree(Path(local_lscpus)) ## #2 retrieve new data with play_on(pattern_hosts='sensors', roles=self._roles) as p: p.shell(f'lscpu > /{remote_lscpu}') p.fetch( display_name='Retrieving the result of lscpu…', src=f'/{remote_lscpu}', dest=f'{local_lscpus}', flat=False, ) for path_host_name in Path(local_lscpus).iterdir(): path_host_name_lscpu = path_host_name / remote_lscpu cpu = CPU(path_host_name_lscpu.resolve()) cpu.get_cpu() self.cpuname_to_cpu[cpu.cpu_name] = cpu self.hostname_to_cpu[path_host_name.name] = cpu
def config(env=None): roles = env["roles"] with play_on(pattern_hosts="all", roles=roles) as p: p.shell("date > /tmp/date") with open("/tmp/date") as f: print(f.readlines())
def provision(rs: Roles): ensure_python3(roles=rs) with play_on(roles=rs, pattern_hosts="OpenStack") as p: # Install the bare necessities p.apt(pkg=['bat', 'curl', 'htop', 'tcpdump', 'lynx', 'vim', 'kmod'], update_cache=True) # Workaround ripgrep error # https://bugs.launchpad.net/ubuntu/+source/rust-bat/+bug/1868517 p.raw('apt download ripgrep') p.raw('dpkg --force-overwrite -i ripgrep*.deb') # IP Forwarding p.raw('sysctl -w net.ipv4.ip_forward=1') # Setup ssh for root w/ password p.raw('echo "root:lab-os" | chpasswd') p.blockinfile(path='/etc/ssh/sshd_config', block=''' PasswordAuthentication yes PermitRootLogin yes ''') p.systemd(name='ssh', state='restarted') # Enhance default bash for l in ( '. /etc/bash_completion', # Offer bash completion 'export PATH=/snap/bin:${PATH}', # Put /snap/bin in PATH 'alias cat="bat --style=plain"', # Better cat 'alias fgrep="rg --fixed-strings"' # Better fgrep ): p.lineinfile(path='/root/.bashrc', line=l)
def destroy(self): """ Destroy the energy monitoring stack. This destroys all containers.""" self._get_cpus() with play_on(pattern_hosts="grafana", roles=self._roles) as p: p.docker_container( display_name="Destroying Grafana…", name="grafana", state="absent", force_kill=True, ) with play_on(pattern_hosts="sensors", roles=self._roles) as p: p.docker_container( display_name="Destroying sensors…", name="powerapi-sensor", state="absent", force_kill=True, ) i = 0 for cpu_name, cpu in self.cpuname_to_cpu.items(): with play_on(pattern_hosts = self._get_address(self.formulas[i%len(self.formulas)]), roles = self._roles) as p: smartwatts_name = self._get_smartwatts_name(cpu) p.docker_container( display_name="Destroying SmartWatts…", name=f"{smartwatts_name}", state="absent", force_kill=True, ) ++i with play_on(pattern_hosts="mongos", roles=self._roles) as p: p.docker_container( display_name="Destroying MongoDBs…", name="mongodb", state="absent", force_kill=True, ) with play_on(pattern_hosts="influxdbs", roles=self._roles) as p: p.docker_container( display_name="Destroying InfluxDBs…", name="influxdb", state="absent", force_kill=True, )
def deploy(self): # Some requirements with play_on(pattern_hosts="all", roles=self.roles) as p: p.apt( display_name="[Preinstall] Installing python-pip", name=["python3", "python-pip", "python3-pip"], state="present", update_cache=True, ) p.pip(display_name="[Preinstall] Installing pyyaml", name="pyyaml") _playbook = os.path.join(SERVICE_PATH, "skydive", "skydive.yml") run_ansible([_playbook], roles=self.roles, extra_vars=self.extra_vars)
def bootstrap(rs: Roles): ensure_python3(roles=rs) with play_on(roles=rs, pattern_hosts="OpenStack") as p: # Install the bare necessities p.apt(pkg=[ 'silversearcher-ag', 'curl', 'htop', 'tcpdump', 'lynx', 'vim', 'kmod' ]) # Setup ssh for root w/ password p.raw('echo "root:os-imt" | chpasswd') p.blockinfile(path='/etc/ssh/sshd_config', block=''' PasswordAuthentication yes PermitRootLogin yes ''') p.systemd(name='ssh', state='restarted') # Put /snap/bin in PATH p.lineinfile(path='/root/.bashrc', line='export PATH=/snap/bin:${PATH}')
def backup(self, backup_dir=None): """Backup the monitoring stack. Args: backup_dir (str): path of the backup directory to use. """ def _check_path(backup_dir): """Make sur the backup_dir is created somewhere.""" backup_path = _to_abs(backup_dir) # make sure it exists backup_path.mkdir(parents=True, exist_ok=True) return backup_path if backup_dir is None: backup_dir = Path.cwd() _backup_dir = _check_path(backup_dir) with play_on(pattern_hosts="collector", roles=self._roles) as p: p.docker_container(display_name="Stopping InfluxDB", name="influxdb", state="stopped") p.archive( display_name="Archiving the data volume", path="/influxdb-data", dest="/influxdb-data.tar.gz", ) p.fetch( display_name="Fetching the data volume", src="/influxdb-data.tar.gz", dest=str(Path(_backup_dir, "influxdb-data.tar.gz")), flat=True, ) p.shell("docker start influxdb", display_name="Restarting InfluxDB")
boxes.print() longestTimeOfLongest = boxes.getMaxTime() logging.debug(f"Longest possible task takes {longestTimeOfLongest}ms.") seed(SEED) provider = G5k(conf) roles, networks = provider.init() roles = discover_networks(roles, networks) priors = [__python3__, __default_python3__, __docker__] with play_on(pattern_hosts='all', roles=roles, priors=priors) as p: p.pip(display_name='Installing python-docker…', name='docker') ## #A deploy jaeger, for now, we set up with all in one with play_on(pattern_hosts='collector', roles=roles) as p: p.docker_container( display_name=f'Installing jaeger…', name='jaeger', image='jaegertracing/all-in-one:1.17', detach=True, network_mode='host', state='started', recreate=True, published_ports=['5775:5775/udp', '6831:6831/udp', '6832:6832/udp', '5778:5778', '16686:16686', '14268:14268',
roles = discover_networks(roles, networks) m = Energy(sensors=roles['sensored'], mongos=roles['collector'], formulas=roles['collector'], influxdbs=roles['collector'], grafana=roles['collector'], monitor={ 'dram': False, 'cores': True }) m.deploy() priors = [__python3__, __default_python3__, __docker__] with play_on(pattern_hosts='all', roles=roles, priors=priors) as p: p.pip(display_name='Installing python-docker…', name='docker') ## #A deploy jaeger, for now, we set up with all in one with play_on(pattern_hosts='collector', roles=roles) as p: p.docker_container(display_name=f'Installing jaeger…', name='jaeger', image='jaegertracing/all-in-one:1.17', detach=True, network_mode='host', state='started', recreate=True, published_ports=[ '5775:5775/udp', '6831:6831/udp', '6832:6832/udp', '5778:5778', '16686:16686', '14268:14268', '14250:14250', '9411:9411'
formulas=roles['control'], influxdbs=roles['control'], grafana=roles['control'], monitor={ 'dram': True, 'cores': True }) m.deploy() ui_address = roles['control'][0].extra['my_network_ip'] print("Grafana is available at http://%s:3000" % ui_address) print("user=admin, password=admin") ## #B deploy a service with play_on(pattern_hosts='compute', roles=roles) as p: p.docker_image( #source='load', # Added in ansible 2.8 name='meow-world', tag='latest', load_path='/home/brnedelec/meow-world_latest.tar' ) ## (TODO) automatic or configurable with play_on(pattern_hosts='compute', roles=roles, extra_vars={ 'ansible_hostname_to_cpu': m.hostname_to_cpu, 'ansible_hostname_to_influxdb': m.hostname_to_influxdb }) as p: p.docker_container( display_name='Installing meow-world service…', name='meow-world-{{inventory_hostname_short}}',
def distem_bootstrap(roles, path_sshkeys): """Bootstrap distem on G5k nodes Args : roles (dict): physical machines to start containers on. path_sshkeys (dict): ssh keys paths Return : distem (class): distem client """ coordinator = _get_all_hosts(roles)[0] distem = d.Distem(serveraddr=coordinator) got_pnodes = False # check if a client is already running try: got_pnodes = distem.pnodes_info() except Exception: logger.error("No pnodes detected - Not critical error") with play_on(roles=roles) as p: # copy ssh keys for each node p.copy(dest="/root/.ssh/id_rsa", src=path_sshkeys["private"], mode="600") p.copy(dest="/root/.ssh/id_rsa.pub", src=path_sshkeys["public"], mode="600") p.lineinfile(path="/root/.ssh/authorized_keys", line=open(path_sshkeys["public"]).read()) repo = "deb [allow_insecure=yes] http://distem.gforge.inria.fr/deb-stretch ./" # instal Distem from the debian package p.apt_repository(repo=repo, update_cache="no", state="present") p.shell("apt-get update") p.apt( name="distem", state="present", allow_unauthenticated="yes", force="yes", force_apt_get="yes", ) # see below p.apt(name="tmux", state="present") p.apt_repository(repo=repo, update_cache="no", state="absent") if got_pnodes: distem.pnodes_quit() with play_on(roles=roles) as p: # kill distem process for each node kill_cmd = [] kill_cmd.append("kill -9 `ps aux|grep \"distemd\"") kill_cmd.append("grep -v grep") kill_cmd.append("sed \"s/ \\{1,\\}/ /g\"") kill_cmd.append("cut -f 2 -d\" \"`") p.shell("|".join(kill_cmd) + "|| true") p.wait_for(state="stopped", port=4567) p.wait_for(state="stopped", port=4568) with play_on(pattern_hosts=coordinator, roles=roles) as p: p.file(state="directory", dest=PATH_DISTEMD_LOGS) # nohup starts distem but 4568 is unreachable (and init-pnodes returns # nil) The only thing I found is to start distem in a tmux session... # this is weird because distem-bootstrap seems to start correctly # distem over SSH without any trouble p.shell('tmux new-session -d "exec distemd --verbose -d"') p.wait_for(state="started", port=4567, timeout=10) p.wait_for(state="started", port=4568, timeout=10) distem.pnode_init(_get_all_hosts(roles)) return distem
def deploy(self): """Deploy the monitoring stack""" if self.collector is None: return # Some requirements with play_on(pattern_hosts="all", roles=self._roles) as p: p.apt( display_name="Installing python-setuptools", name="python-pip", state="present", update_cache=True, ) p.pip(display_name="Installing python-docker", name="docker") p.shell( "which docker || (curl -sSL https://get.docker.com/ | sh)", display_name="Installing docker", ) # Deploy the collector with play_on(pattern_hosts="collector", roles=self._roles) as p: p.docker_container( display_name="Installing", name="influxdb", image="influxdb", detach=True, network_mode="host", state="started", volumes=["/influxdb-data:/var/lib/influxdb"], ) p.wait_for( display_name="Waiting for InfluxDB to be ready", host="localhost", port="8086", state="started", delay=2, timeout=120, ) # Deploy the agents _path = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) if self.network is not None: # This assumes that `discover_network` has been run before collector_address = self.collector[0].extra[self.network + "_ip"] else: collector_address = self.collector[0].address extra_vars = {"collector_address": collector_address} with play_on(pattern_hosts="agent", roles=self._roles, extra_vars=extra_vars) as p: p.template( display_name="Generating the configuration file", src=os.path.join(_path, self.agent_conf), dest="/telegraf.conf", ) volumes = [ "/telegraf.conf:/etc/telegraf/telegraf.conf", "sys:/rootfs/sys:ro", "/proc:/rootfs/proc:ro", "/var/run/docker.sock:/var/run/docker.sock:ro", ] p.docker_container( display_name="Installing Telegraf", name="telegraf", image="telegraf", detach=True, state="started", network_mode="host", volumes=volumes, env={ "HOST_PROC": "/rootfs/proc", "HOST_SYS": "/rootfs/sys" }, ) # Deploy the UI with play_on(pattern_hosts="ui", roles=self._roles) as p: p.docker_container( display_name="Installing Grafana", name="grafana", image="grafana/grafana", detach=True, network_mode="host", state="started", ) p.wait_for( display_name="Waiting for grafana to be ready", host="localhost", port=3000, state="started", delay=2, timeout=120, )
def deploy(self): """Deploy the energy monitoring stack.""" ## #0A Retrieve requirements with play_on(pattern_hosts='all', roles=self._roles, priors=self.priors) as p: p.pip(display_name='Installing python-docker…', name='docker') ## #0B retrieve cpu data from each host then perform a checking self._get_cpus() logging.debug(self.cpuname_to_cpu) logging.debug(self.hostname_to_cpu) if (len(self.mongos) > len(self.cpuname_to_cpu) or len(self.formulas) > len(self.cpuname_to_cpu) or len(self.influxdbs) > len(self.formulas)): logging.warning("""There might be an issue with the setup: too many collectors (stack dbs and analysis), (or) not enough cpu types. It may waste resources.""") ## #0C clean everything to make sure that interdependency ## conditions are met (needed since restarting without it led ## to early crashes of smartwatts formula…) self.destroy() ## #1 Deploy MongoDB collectors with play_on(pattern_hosts='mongos', roles=self._roles) as p: p.docker_container( display_name='Installing mongodb…', name='mongodb', image=f'mongo:{MONGODB_VERSION}', detach=True, state='started', recreate=True, exposed_ports=[f'27017'], published_ports=[f'{MONGODB_PORT}:27017'], volumes='/tmp/:/data/db', ) p.wait_for( display_name='Waiting for MongoDB to be ready…', host='localhost', port='27017', state='started', delay=2, timeout=120, ) ## #2 Deploy energy sensors cpunames = list(self.cpuname_to_cpu.keys()) for hostname, cpu in self.hostname_to_cpu.items(): mongo_index = cpunames.index(cpu.cpu_name)%len(self.mongos) influxdb_index = cpunames.index(cpu.cpu_name)%len(self.influxdbs) self.hostname_to_mongo[hostname] = self._get_address(self._roles['mongos'][mongo_index]) self.hostname_to_influxdb[hostname] = self._get_address(self._roles['influxdbs'][influxdb_index]) with play_on(pattern_hosts='sensors', roles=self._roles, extra_vars={'ansible_hostname_to_mongo': self.hostname_to_mongo, 'ansible_hostname_to_cpu': self.hostname_to_cpu}) as p: # (TODO) check without volumes, it potentially uses volumes to read about # events and containers... maybe it is mandatory then. volumes = ['/sys:/sys', '/var/lib/docker/containers:/var/lib/docker/containers:ro', '/tmp/powerapi-sensor-reporting:/reporting'] command=['-n sensor-{{inventory_hostname_short}}', '-r mongodb -U mongodb://{{ansible_hostname_to_mongo[inventory_hostname]}}:27017', f'-D {SENSORS_OUTPUT_DB_NAME}', '-C col_{{ansible_hostname_to_cpu[inventory_hostname].cpu_shortname}}', '-s rapl -o',] ## RAPL: Running Average Power Limit (need privileged) ## (TODO) double check if these options are available at hardware/OS level if self.monitor['cores']: command.append('-e RAPL_ENERGY_PKG') # power consumption of all cores + LLc cache if self.monitor['dram'] : command.append('-e RAPL_ENERGY_DRAM') # power consumption of DRAM if self.monitor['cores']: command.append('-e RAPL_ENERGY_CORES') # power consumption of all cores on socket if self.monitor['gpu'] : command.append('-e RAPL_ENERGY_GPU') # power consumption of GPU command.extend(['-s msr -e TSC -e APERF -e MPERF', '-c core', ## CORE # (TODO) does not seem to work properly this part # (TODO) check possible event names depending on cpu architecture #'-e "CPU_CLK_THREAD_UNHALTED:REF_P"', ## nehalem & westmere #'-e "CPU_CLK_THREAD_UNHALTED:THREAD_P"', ## nehalem & westmere #'-e "CPU_CLK_THREAD_UNHALTED.REF_XCLK"', # sandy -> broadwell archi, not scaled! #'-e "CPU_CLK_THREAD_UNHALTED.REF_XCLK"', # skylake and newer, must be scale by x4 base ratio. '-e CPU_CLK_UNHALTED', '-e LLC_MISSES -e INSTRUCTIONS_RETIRED']) p.docker_container( display_name='Installing PowerAPI sensors…', name='powerapi-sensor', image=f'powerapi/hwpc-sensor:{HWPCSENSOR_VERSION}', detach=True, state='started', recreate=True, network_mode='host', privileged=True, volumes=volumes, command=command, ) ## #3 deploy InfluxDB, it will be the output of SmartWatts and ## the input of the optional Grafana. with play_on(pattern_hosts='influxdbs', roles=self._roles) as p: p.docker_container( display_name='Installing InfluxDB…', name='influxdb', image=f'influxdb:{INFLUXDB_VERSION}', detach=True, state='started', recreate=True, exposed_ports='8086', published_ports=f'{INFLUXDB_PORT}:8086', ) p.wait_for( display_name='Waiting for InfluxDB to be ready…', host='localhost', port='8086', state='started', delay=2, timeout=120, ) ## #4 deploy SmartWatts (there may be multiple SmartWatts per machine) ## (TODO) start multiple formulas in the same formula container? ## (TODO) ansiblify instead of sequentially push commands i = 0 for cpu_name, cpu in self.cpuname_to_cpu.items(): cpunames = list(self.cpuname_to_cpu.keys()) mongo_index = cpunames.index(cpu.cpu_name)%len(self.mongos) mongo_addr = self._get_address(self._roles['mongos'][mongo_index]) influxdbs_addr = self._get_address(self.influxdbs[i%len(self.influxdbs)]) smartwatts_name = self._get_smartwatts_name(cpu) with play_on(pattern_hosts = self._get_address(self.formulas[i%len(self.formulas)]), roles = self._roles) as p: command=['-s', '--input mongodb --model HWPCReport', f'--uri mongodb://{mongo_addr}:{MONGODB_PORT}', f'-d {SENSORS_OUTPUT_DB_NAME} -c col_{cpu.cpu_shortname}', # f"--output influxdb --name hwpc --model HPWCReport", # f"--uri {influxdbs_addr} --port {INFLUXDB_PORT} --db hwpc_report", f'--output influxdb --name power_{cpu.cpu_shortname} --model PowerReport', f'--uri {influxdbs_addr} --port {INFLUXDB_PORT} --db power_{cpu.cpu_shortname}', # vvv Formula report does not have to_influxdb (yet?) #f"--output influxdb --name formula --model FormulaReport", #f"--uri {influxdbs_addr} --port {INFLUXDB_PORT} --db formula_report", '--formula smartwatts', f'--cpu-ratio-base {cpu.cpu_nom}', f'--cpu-ratio-min {cpu.cpu_min}', f'--cpu-ratio-max {cpu.cpu_max}', f'--cpu-error-threshold {SMARTWATTS_CPU_ERROR_THRESHOLD}', f'--dram-error-threshold {SMARTWATTS_DRAM_ERROR_THRESHOLD}',] if not self.monitor['cores']: command.append('--disable-cpu-formula') if not self.monitor['dram'] : command.append('--disable-dram-formula') p.docker_container( display_name='Installing smartwatts formula…', name=f'{smartwatts_name}', image=f'powerapi/smartwatts-formula:{SMARTWATTS_VERSION}', detach=True, network_mode='host', recreate=True, command=command, ) ++i ## #5 Deploy the optional grafana server if self.grafana is None: return ## #A prepare dashboard with open('grafana_dashboard.json', 'r') as f: dashboard_json = json.load(f) panel_targets = [None] * len(self.cpuname_to_cpu) i = 0 for cpu_name, cpu in self.cpuname_to_cpu.items(): panel_targets[i] = { 'datasource': f'power-{cpu_name}', 'groupBy': [{'params':['$__interval'], 'type':'time'}, {'params':['target'], 'type':'tag'}], 'measurement': 'power_consumption', 'orderByTime': 'ASC', 'policy': 'default', 'refId': f'{cpu.cpu_shortname}', 'resultFormat': 'time_series', 'select': [[{'params':['power'], 'type': 'field'}, {'params':[], 'type': 'mean'}]], 'tags': [{'key':'target', 'operator':'!=', 'value':'global'}, {'key':'target', 'operator':'!=', 'value':'powerapi-sensor'}, {'key':'target', 'operator':'!=', 'value':'rapl'}]} i = i + 1 dashboard_json['dashboard']['panels'][0]['targets'] = panel_targets with play_on(pattern_hosts='grafana', roles=self._roles) as p: p.docker_container( display_name='Installing Grafana…', name='grafana', image=f'grafana/grafana:{GRAFANA_VERSION}', detach=True, recreate=True, state='started', #exposed_ports='3000', network_mode='host', # not very clean "host" # published_ports=f'{GRAFANA_PORT}:3000', ) p.wait_for( display_name='Waiting for Grafana to be ready…', host='localhost', port='3000', state='started', delay=2, timeout=120, ) ## #B add datasources and fill the dashboard i = 0 for cpu_name, cpu in self.cpuname_to_cpu.items(): influxdbs_addr = self._get_address(self.influxdbs[i%len(self.influxdbs)]) smartwatts_name = self._get_smartwatts_name(cpu) p.uri( display_name='Add InfluxDB power reports in Grafana…', url=f'http://localhost:{GRAFANA_PORT}/api/datasources', user='******', password='******', force_basic_auth=True, body_format='json', method='POST', status_code=[200, 409], # 409 means: already added body=json.dumps({'name': f'power-{cpu_name}', 'type': 'influxdb', 'url': f'http://{influxdbs_addr}:{INFLUXDB_PORT}', 'access': 'proxy', 'database': f'power_{cpu.cpu_shortname}', 'isDefault': True}), ) i = i + 1 p.uri( display_name='Create a dashboard with all containers…', url='http://localhost:3000/api/dashboards/import', user='******', password='******', force_basic_auth=True, body_format='json', method='POST', status_code=[200], body=json.dumps(dashboard_json) )
cluster="paravance", number=1, flavour="large")\ .finalize() provider = Distem(conf) roles, networks = provider.init() print(roles) print(networks) gateway = networks[0]['gateway'] print("Gateway : %s" % gateway) discover_networks(roles, networks) with play_on(roles=roles, gather_facts=False) as p: # We first need internet connectivity # Netmask for a subnet in g5k is a /14 netmask p.shell("ifconfig if0 $(hostname -I) netmask 255.252.0.0") p.shell("route add default gw %s dev if0" % gateway) # Experimentation logic starts here with play_on(roles=roles) as p: # flent requires python3, so we default python to python3 p.apt_repository( repo="deb http://deb.debian.org/debian stretch main contrib non-free", state="present") p.apt(name=["flent", "netperf", "python3-setuptools"], state="present") with play_on(pattern_hosts="server", roles=roles) as p: p.shell("nohup netperf &")
image="/grid5000/virt-images/debian9-x64-std-2019040916.qcow2", gateway="access.grid5000.fr", gateway_user="******")\ .add_machine(roles=["server"], cluster="grisou", number=1)\ .add_machine(roles=["client"], cluster="grisou", number=1)\ .finalize() provider = VMonG5k(conf) roles, networks = provider.init() discover_networks(roles, networks) with play_on("all", roles=roles) as p: # flent requires python3, so we default python to python3 p.shell( "update-alternatives --install /usr/bin/python python /usr/bin/python3 1" ) p.apt_repository( repo="deb http://deb.debian.org/debian stretch main contrib non-free", state="present") p.apt(name=["flent", "netperf", "python3-setuptools"], state="present") with play_on("server", roles=roles) as p: p.shell("nohup netperf &") with play_on("client", roles=roles) as p: p.shell("flent rrul -p all_scaled " + "-l 60 " + "-H {{ hostvars[groups['server'][0]].inventory_hostname }} " +
}) e.deploy() ## #B check if everything has deployed well local_sensor_logs = './_tmp_enos_/sensor-logs' remote_sensor_logs = 'tmp/sensor-logs' ## #1 remove outdated data localDirLogs = Path(f"{local_sensor_logs}/{roles['calibrate'][0].address}") if localDirLogs.exists() and localDirLogs.is_dir(): shutil.rmtree(localDirLogs) ## #2 retrieve new data with play_on(pattern_hosts='calibrate', roles=roles) as p: p.shell( f'sudo docker container logs powerapi-sensor > /{remote_sensor_logs}' ) p.fetch( display_name='Retrieving the logs of powerapi-sensor', src=f'/{remote_sensor_logs}', dest=f'{local_sensor_logs}', flat=False, ) pathFileLogs = localDirLogs / remote_sensor_logs with pathFileLogs.open('r') as f: logs = f.read() print(logs)
"networks": [ { "roles": ["local"], "start": "172.17.0.0", "end": "172.17.255.255", "cidr": "172.17.0.0/16", "gateway": "172.17.0.1", "dns": "172.17.0.1", } ], } } inventory = os.path.join(os.getcwd(), "hosts") conf = Configuration.from_dictionnary(provider_conf) provider = Static(conf) roles, networks = provider.init() with play_on(roles=roles) as p: p.shell("date > /tmp/date") with open("/tmp/date") as f: print(f.readlines()) # async with play_on(pattern_hosts="all", roles=roles) as p: for i in range(10): p.shell("sleep 10", async=100, poll=0)
image="/grid5000/virt-images/debian9-x64-std-2019040916.qcow2", gateway="access.grid5000.fr", gateway_user="******")\ .add_machine(roles=["server"], cluster="grisou", number=1)\ .add_machine(roles=["client"], cluster="grisou", number=1)\ .finalize() provider = VMonG5k(conf) roles, networks = provider.init() discover_networks(roles, networks) with play_on(roles=roles) as p: # flent requires python3, so we default python to python3 p.shell( "update-alternatives --install /usr/bin/python python /usr/bin/python3 1" ) p.apt_repository( repo="deb http://deb.debian.org/debian stretch main contrib non-free", state="present") p.apt(name=["flent", "netperf", "python3-setuptools"], state="present") with play_on(pattern_hosts="server", roles=roles) as p: p.shell("nohup netperf &") with play_on(pattern_hosts="client", roles=roles) as p: p.shell("flent rrul -p all_scaled " + "-l 60 " + "-H {{ hostvars[groups['server'][0]].inventory_hostname }} " +
"machines": [{ "roles": ["control"], "address": "localhost", "alias": "test_machine", "extra": { "ansible_connection": "local" } }], "networks": [{ "roles": ["local"], "start": "172.17.0.0", "end": "172.17.255.255", "cidr": "172.17.0.0/16", "gateway": "172.17.0.1", "dns": "172.17.0.1", }] } } inventory = os.path.join(os.getcwd(), "hosts") conf = Configuration.from_dictionnary(provider_conf) provider = Static(conf) roles, networks = provider.init() with play_on("all", roles=roles) as p: p.shell("date > /tmp/date") with open("/tmp/date") as f: print(f.readlines())
def monitor(rs: Roles, nets: List[Network]): '''Fig4. Reusable function for monitoring. Collect metrics on `monitored` hosts. Store and see metrics on `aggregator` hosts. Use the `monitor` network to send metrics. ''' # Discover networks to use net info in telegraf.conf.j2 discover_networks(rs, nets) # Install Docker with play_on(pattern_hosts="all", roles=rs) as ansible: ansible.shell( "which docker || (curl -sSL https://get.docker.com/ | sh)", display_name="Install docker") ansible.apt( display_name="Install python-docker (for ansible docker_container)", name="python-docker", update_cache=True) # Install Telegraf on monitored machines with play_on(pattern_hosts="monitored", roles=rs, gather_facts="all") as ansible: ansible.template( display_name="Generating Telegraf conf", src="misc/telegraf.conf.j2", dest="/root/telegraf.conf") ansible.docker_container( display_name="Installing Telegraf", name="telegraf", image="telegraf:1.12-alpine", detach=True, network_mode="host", state="started", volumes=['/root/telegraf.conf:/etc/telegraf/telegraf.conf']) # Install InfluxDB and Grafana on `aggregator` machines with play_on(pattern_hosts="aggregator", roles=rs) as ansible: ansible.docker_container( display_name="Install InfluxDB", name="influxdb", image="influxdb:1.7-alpine", detach=True, state="started", network_mode="host", exposed_ports="8086:8086") ansible.wait_for( display_name="Waiting for InfluxDB to be ready", host="localhost", port="8086", state="started", delay=2, timeout=120,) ansible.docker_container( display_name="Install Grafana", name="grafana", image="grafana/grafana:5.4.3", detach=True, state="started", network_mode="host", exposed_ports="3000:3000") ansible.wait_for( display_name="Waiting for Grafana to be ready", host="localhost", port="3000", state="started", delay=2, timeout=120,) ansible.uri( display_name="Add InfluxDB in Grafana", url="http://localhost:3000/api/datasources", user="******", password="******", force_basic_auth=True, body_format="json", method="POST", status_code=[200,409], # 409 for already added body=json.dumps({ "name": "telegraf", "type": "influxdb", "url": "http://localhost:8086", "access": "proxy", "database": "telegraf", "isDefault": True})) ansible.uri( display_name="Import dashboard in Grafana", url="http://localhost:3000/api/dashboards/import", user="******", password="******", force_basic_auth=True, body_format="json", method="POST", status_code=[200], # 409 for already added src="misc/grafana-dashboard.json") # Display UI URLs to view metrics ui_urls = map(lambda h: f'http://{h.extra["monitor_ip"]}:3000', rs['aggregator']) LOG.info(f'View UI on {list(ui_urls)}') LOG.info('Connect with `admin` as login and password, ' 'then skip the change password, ' 'and finally select `Host Dashboard`.')