def get_tcpdump_command(): args = [['not', 'port', str(port), 'and'] for port in FILTER_PORTS] args = [j for i in args for j in i] command = ['tcpdump', '-c', str(TRAFFIC_SAMPLE), '-i', 'any', '-nn', 'ip', 'and', '-l', '-t'] + args + \ ['tcp', 'and', '(((ip[2:2] - ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0)'] log.info('Running command: {}'.format(' '.join(command))) return command
async def compute_util_and_waste(self): info = await get_container_utilization() try: containers = [docker_id[len('/docker/'):] for docker_id in info.keys()] util_list = [self.get_util(value) for value in info.values()] self.filter_dadvisor(containers, util_list) except Exception as e: log.error(e) return if not util_list: return cpu_util_list, mem_util_list = zip(*util_list) self.scale_list(cpu_util_list) self.scale_list(mem_util_list) cpu_waste_list = self.get_waste(cpu_util_list) mem_waste_list = self.get_waste(mem_util_list) log.info(cpu_util_list) for i, container in enumerate(containers): self.cpu_util_container_sum.labels(src=container, src_host=IP) \ .inc(cpu_util_list[i] * FACTOR) self.mem_util_container_sum.labels(src=container, src_host=IP) \ .inc(mem_util_list[i] * FACTOR) self.cpu_waste_container_sum.labels(src=container, src_host=IP) \ .inc(cpu_waste_list[i] * FACTOR) self.mem_waste_container_sum.labels(src=container, src_host=IP) \ .inc(mem_waste_list[i] * FACTOR)
async def compute_network_usage(self): data = await get_container_stats() try: containers = [docker_id[len('/docker/'):] for docker_id in data.keys()] network_values = [self.get_network(value) for value in data.values()] self.filter_dadvisor(containers, network_values) for i, container in enumerate(containers): prev = self.prev_network_container.get(container, 0) log.info(f'Container {container}: {prev}') self.prev_network_container[container] = network_values[i] self.network_container_sum.labels(src=container, src_host=IP) \ .inc(network_values[i] - prev) except Exception as e: log.error(e)
async def run(self): elapsed = 0 while self.running: try: await asyncio.sleep(SLEEP_TIME - elapsed) now = datetime.utcnow() log.info(f'Sleeping {SLEEP_TIME - elapsed} sec') # Execute once per SLEEP_TIME await self.compute_network_usage() await self.compute_util_and_waste() now2 = datetime.utcnow() elapsed = (now2 - now).seconds except Exception as e: log.error(e) log.info('StatsCollector stopped')
async def run(self): """ Performs the following two actions: - only at initialization: collect static information about the host price - continuously (every 30 sec) perform the following actions: - find new containers - validate own containers (find out ip address and if they're alive) :return: """ while self.running: try: await asyncio.sleep(SLEEP_TIME) await self.collect_own_containers() await self.validate_own_containers() except Exception as e: log.error(e) log.info('ContainerCollector stopped')
def run(self): self.check_installation() command = self.get_tcpdump_command() multiplier = 1 while self.running: """ One iteration of this while loop performns the following actions: 1. Run the tcpdump command that captures TRAFFIC_SAMPLE requests. This is collected in X seconds. 2. Resolve these requests by communicating with the other nodes 3. Sleep k*X seconds, with a lower- and upperbound. """ start_time = time.time() p = subprocess.Popen(command, stdout=subprocess.PIPE) # parse results for row in iter(p.stdout.readline, b''): try: dataflow = parse_row(self.container_collector, row.decode('utf-8')) dataflow.size = (dataflow.size + HEADER_SIZE) * multiplier self.analyser.loop.create_task(self.analyser.analyse_dataflow(dataflow)) except Exception as e: log.error(e) log.error('Cannot parse row: {}'.format(row.decode('utf-8').rstrip())) end_time = time.time() elapsed = end_time - start_time log.info('Monitoring {} packets in {} sec'.format(TRAFFIC_SAMPLE, elapsed)) self.analyser.loop.create_task(self.analyser.cache.resolve(self.node_collector)) # sleep K times the elapsed time. Minus the time it takes to resolve the cache sleep_time = TRAFFIC_K * elapsed - (time.time() - end_time) sleep_time = min(max(sleep_time, TRAFFIC_SLEEP_MIN), TRAFFIC_SLEEP_MAX) if elapsed != 0: multiplier = (sleep_time + elapsed) / elapsed else: multiplier = 1 log.info(f'Multiplier: {multiplier}') log.info('Sleeping for: {} sec'.format(sleep_time)) time.sleep(sleep_time) log.info('Inspector thread stopped')
def stop(self): log.info('Stopping InspectorThread') self.running = False
async def run_app(app): runner = web.AppRunner(app) await runner.setup() site = web.TCPSite(runner, '0.0.0.0', INTERNAL_PORT) log.info('Running on localhost:{}'.format(INTERNAL_PORT)) await site.start()
IP = os.environ.get('IP', ip) IS_SUPER_NODE = os.environ.get('TYPE', 'NODE') == 'SUPERNODE' PROXY_PORT = int(os.environ.get('DADVISOR_PORT', 14100)) INTERNAL_PORT = 14101 PROMETHEUS_PORT = 14102 CADVISOR_URL = 'http://localhost:14104' PROMETHEUS_URL = f'http://localhost:{PROXY_PORT}/prometheus' TRACKER = os.environ.get('TRACKER', 'http://35.204.250.252:14100') FILTER_PORTS = os.environ.get( 'FILTER_PORTS', f'22,{PROXY_PORT},{INTERNAL_PORT},{PROMETHEUS_PORT}').split(',') log.info(f'Filtering internet traffic ports: {FILTER_PORTS}') # INTERNET TRAFFIC # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - TRAFFIC_SAMPLE = int(os.environ.get('TRAFFIC_SAMPLE', 1000)) TRAFFIC_K = int(os.environ.get('TRAFFIC_K', 9)) TRAFFIC_SLEEP_MIN = int(os.environ.get('TRAFFIC_SLEEP_MIN', 1)) TRAFFIC_SLEEP_MAX = int(os.environ.get('TRAFFIC_SLEEP_MAX', 150)) SLEEP_TIME = int(os.environ.get('SLEEP_TIME', 60)) PREFIX = '/dadvisor' LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO') log.setLevel(LOG_LEVEL) # Possible log values: 'CRITICAL', 'FATAL', 'ERROR', 'WARN', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'
def stop(self): self.running = False log.info('Stopping containerCollector')
def scale_list(util_list): s = sum(util_list) if s > 1: log.info(f'Scaling list: {util_list}') for i, item in enumerate(util_list): util_list[i] = item / s
def remove_node(loop, node): log.info(f'Removing peer: {node}') loop.create_task(_send_post(f'{TRACKER}/root/remove', data=node))
def register_node(loop, node): log.info(f'Registering peer: {node}') loop.create_task(_send_post(f'{TRACKER}/root/add', data=node))