def main(args): migration_hosts = args.hosts.replace('"', '').replace('\'', '').split(',') marathon_client = MarathonClient(args.url) # Get the running marathon application dictionary with constraints all_apps = utils.dict_by_key_and_value(lambda x: x.id, lambda y: y.constraints, marathon_client.list_apps()) print(">>> All Running Applications: ") print( json.dumps(all_apps.keys(), sort_keys=True, indent=4, separators=(',', ': '))) # Constraints to remove sentinels = map(lambda x: MarathonConstraint('hostname', 'UNLIKE', x), migration_hosts) # Find all apps with a leftover constraint filtered_apps = {} for sentinel in sentinels: for app_id in all_apps: if sentinel in all_apps[app_id]: to_update = {app_id: all_apps[app_id]} print ">>> Adding app to filtered list: %s" % (app_id) filtered_apps.update(to_update) # Tasks unmigration unmigrate_tasks(marathon_client, filtered_apps, sentinels, args.force)
def clean_deploy_ids(self): marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port)) apps = marathon_client.list_apps() app_ids = [x.id for x in apps] for deploy_id in self.deploy_ids: if not deploy_id in app_ids: print 'deploy_id is not in app id! '+str(deploy_id)
def get_marathon_app_id(self): marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port)) apps = marathon_client.list_apps() my_encoded_id = self.encode_marathon_id for app in apps: if app.id == my_encoded_id: return app.id return None
def list_app(request): mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) apps = mc.list_apps() apps = sorted(apps, key=lambda app: app.id) for app in apps: app.tag_id = app.id.replace("/","__") data = {'apps': apps} return render(request, 'marathon_mgmt/list_app.html', data)
def is_deployed(self): marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port)) apps = marathon_client.list_apps() my_encoded_id = self.encode_marathon_id for app in apps: if my_encoded_id in app.id: return True return False
def get_hosts_dict(self): hosts = {} for app in MarathonClient.list_apps(self): for task in MarathonClient.get_app(self, app.id).tasks: host = task.host if not host in hosts: hosts[host] = [] hosts[host].append(task) return hosts
def get_hosts_dict(self): hosts={} for app in MarathonClient.list_apps(self): for task in MarathonClient.get_app(self,app.id).tasks: host = task.host if not host in hosts: hosts[host]=[] hosts[host].append(task) return hosts
def list_app(request): mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) apps = mc.list_apps() apps = sorted(apps, key=lambda app: app.id) for app in apps: app.tag_id = app.id.replace("/", "__") data = {'apps': apps} return render(request, 'marathon_mgmt/list_app.html', data)
def get_deployed_labeled_group_ids(self, labels): ids = [] marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port)) apps = marathon_client.list_apps() for app in apps: decoded = decode_marathon_id(app.id) if labels == decoded['labels'] and self.name == decoded['service']: # return app.id ids.append(app.id) return ids
def dashboard(request): data = {} data['total_template'] = Template.objects.count() mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) data['total_app'] = len(mc.list_apps()) cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) jobs = cclient.list() data['total_job'] = len(cclient.list()) data['total_watcher'] = len(settings.WATCHER_THREADS) return render(request, 'dashboard/dashboard.html',data)
def get_apps_json_config(self): n = 0 d = {} for app in MarathonClient.list_apps(self): json_data = json.loads(app.to_json()) for p in 'tasks', 'tasksRunning', 'tasksStaged': if p in json_data: del json_data[p] d[n] = json_data n += 1 return (json.dumps(d))
def get_apps_json_config(self): n=0 d={} for app in MarathonClient.list_apps(self): json_data=json.loads(app.to_json()) for p in 'tasks', 'tasksRunning', 'tasksStaged': if p in json_data: del json_data[p] d[n]=json_data n+=1 return(json.dumps(d))
def ajax_list_apps(request): mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) apps = mc.list_apps() apps = sorted(apps, key=lambda app: app.id) filter_name = request.GET.get('filter_name', "") if filter_name != "": for app in apps[:]: app.tag_id = app.id.replace("/","__") if app.id.find(filter_name) == -1: apps.remove(app) else: for app in apps: app.tag_id = app.id.replace("/","__") data = {'apps': apps} return render(request, 'marathon_mgmt/ajax_list_apps.html', data)
def ajax_list_apps(request): mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) apps = mc.list_apps() apps = sorted(apps, key=lambda app: app.id) filter_name = request.GET.get('filter_name', "") if filter_name != "": for app in apps[:]: app.tag_id = app.id.replace("/", "__") if app.id.find(filter_name) == -1: apps.remove(app) else: for app in apps: app.tag_id = app.id.replace("/", "__") data = {'apps': apps} return render(request, 'marathon_mgmt/ajax_list_apps.html', data)
def dashboard(request): data = {} data["total_template"] = Template.objects.count() try: mc = MarathonClient("http://{}:{}".format(settings.MARATHON["host"], settings.MARATHON["port"])) data["total_app"] = len(mc.list_apps()) except Exception as e: data["total_app"] = [] try: cclient = chronos.connect("{}:{}".format(settings.CHRONOS["host"], settings.CHRONOS["port"])) jobs = cclient.list() data["total_job"] = len(cclient.list()) except Exception as e: data["total_job"] = [] data["total_watcher"] = len(settings.WATCHER_THREADS) return render(request, "dashboard/dashboard.html", data)
def ports_used(request): mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) apps = mc.list_apps() used_ports = {} for app in apps: tasks = mc.list_tasks(app.id) for task in tasks: if task.host in used_ports.keys(): used_ports[task.host].extend(task.ports) else: used_ports[task.host] = task.ports list_host_ports = [] for key in sorted(used_ports.keys()): list_host_ports.append([key, sorted(used_ports[key])]) data = {} data['used_ports'] = list_host_ports return render(request, 'marathon_mgmt/ports_used.html', data)
def dashboard(request): data = {} data['total_template'] = Template.objects.count() try: mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) data['total_app'] = len(mc.list_apps()) except Exception as e: data['total_app'] = [] try: cclient = chronos.connect('{}:{}'.format(settings.CHRONOS['host'], settings.CHRONOS['port'])) jobs = cclient.list() data['total_job'] = len(cclient.list()) except Exception as e: data['total_job'] = [] data['total_watcher'] = len(settings.WATCHER_THREADS) return render(request, 'dashboard/dashboard.html', data)
def put_app(client: MarathonClient, definition_path: str, fullrollback: bool) -> str: rollback_order = None if os.path.isdir(definition_path): prompt = input( 'The path {} is a directory. Deploy applications defined in it?\nType \'YES\'' ' to confirm: '.format(definition_path)) if prompt != 'YES': print("Aborting") sys.exit(2) if fullrollback: print( 'If you cancel any deployment, all previous applications (although successfully deployed) ' 'will be rolled back to their previous states.\nAre you totally sure?' ) if input('Type \'YES\' to confirm: ') != 'YES': print('Aborting') sys.exit(2) rollback_order = [] for definition_filename in sorted(os.listdir(definition_path)): definition_filepath = os.path.join(definition_path, definition_filename) if not definition_filename.startswith('#') and os.path.isfile( definition_filepath): # Commented files support deployed = put_app(client, definition_filepath, False) if deployed is False and rollback_order is not None: # Initiate full rollback!! rollback_order.sort(reverse=True) do_full_rollback(client, rollback_order) if rollback_order is not None: rollback_order.append(deployed) return definition_path with open(definition_path) as json_file: app = MarathonApp.from_json(json.load(json_file)) appid = app.id if app.id.startswith('/') else '/' + app.id if any(filter(lambda x: x.id == appid, client.list_apps())): return _update_application(client, app, definition_path) return _create_application(client, app, definition_path)
class Services(object): def __init__(self, endpoints): self.marathon = MarathonClient(endpoints) def list(self): return self.marathon.list_apps() def clean(self, pattern=None): apps = self.list() for app in apps: try: if pattern == None or re.match(pattern, app.id) != None: logging.info("Deleting app: %s", app.id) self.marathon.delete_app(app.id, force=True) else: logging.info("Ignoring app %s. Did not match pattern %s", app.id, pattern) except: logger.info("Unable to delete app %s", app.id) traceback.print_exc() def register_services(self, service_registry="conf/marathon"): for app_def in glob.glob(os.path.join(service_registry, "*json")): with open(app_def, "r") as stream: args = json.loads(stream.read()) app_id = args['id'] args = Names.snake_case(args) logger.debug("Creating service: %s", json.dumps(args, indent=2)) args['tasks'] = [] app = MarathonApp(**args) try: logging.info("Creating app [id=>{0}]".format(app_id)) self.marathon.create_app(app_id, app) except: traceback.print_exc()
def deploy(app_definition, marathon_url, instances, auth_token, zero, force): old_appids = [] # Connect to Marathon print("\nConnecting to Marathon...") c = MarathonClient(marathon_url, auth_token=auth_token) print("Connected to", marathon_url) # Pick up the Marathon App Definition file app_json = open(app_definition).read() app = MarathonApp.from_json(json.loads(app_json)) new_app_id = app.id service_name = new_app_id.split("/")[-1].split(".")[0] # Instantiate the new application on DC/OS but don't launch it yet # The application definition instances field should be 0 by default # If forced, the application will be relaunched even if the ID already exists print("\nInstantiating new application on Marathon with", app.instances, "instances...") try: c.create_app(new_app_id, app) except: if force == 'Yes': print("\nForcing redeploy of the same app id...", new_app_id) c.update_app(new_app_id, app, force=True, minimal=True) check_deployment(c, new_app_id) pass else: sys.exit() print("Created app", new_app_id) # List and find currently running apps of the same service # This assumes the naming convention (id): /some/group/service_name.uniquevalue print("\nFinding any existing apps for service:", service_name) for app in c.list_apps(): existing_service_name = app.id.split("/")[-1].split(".")[0] if (service_name == existing_service_name) and app.instances > 0: print("Found up and running application id:", app.id) old_appids.append(app.id) # If it's the first deployment ever, just launch the desired number of instances # Otherwise perform a hybrid release # Finally clean up any older app instances running if not old_appids: if instances is None: instances = 2 print("No current apps found. Launching brand new service with", instances, "instances...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) else: old_appids.reverse() if zero == 'Yes': print("\nStarting zero downtime deployment for...", new_app_id) for old_appid in old_appids: if instances is None: instances = c.get_app(old_appid).instances if (old_appid == '' or old_appid == new_app_id or old_appid == '/' + new_app_id): print("Scaling existing app_id", new_app_id, "to", instances, "instances...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) else: print("Target number of total instances:", instances) delta = int(round(instances * .50)) delta = (delta if delta > 0 else 1) scale(c, new_app_id, old_appid, delta) if (c.get_app(new_app_id).instances != instances): print("\nLaunch", instances - delta, "remaining instance(s) of the new version...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) if (c.get_app(old_appid).instances > 0): print( "Finish shutting down remaining instances of the old version..." ) c.scale_app(old_appid, instances=0) check_deployment(c, old_appid) else: print("Started deployment with downtime...") for old_appid in old_appids: c.scale_app(old_appid, instances=0) check_deployment(c, old_appid) c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) print("\nSUCCESS:\nNew application ID:", new_app_id, "\nRunning instances:", instances)
from time import time, sleep from distributed import Client from daskathon import MarathonCluster from marathon import MarathonClient cg = MarathonClient('http://localhost:8080') for app in cg.list_apps(): cg.delete_app(app.id, force=True) def test_multiple_workers(): with MarathonCluster(nworkers=2, marathon='http://localhost:8080', scheduler_port=9001, diagnostics_port=9101) as mc: while len(mc.scheduler.workers) < 2: sleep(0.1) with Client(mc.scheduler_address) as c: x = c.submit(lambda x: x + 1, 1) assert x.result() == 2 def test_manual_scaling(): with MarathonCluster(marathon='http://localhost:8080', scheduler_port=9002, diagnostics_port=9102) as mc: assert not mc.scheduler.ncores
class HealthCheckBencher(object): def __init__(self, marathon_url, image, tasks): self.concurrency = 20 self.docker_image = image self.app_base_name = 'health-check-test-' self.total_tasks_cout = int(tasks) self.instances_per_app = 50 if tasks < self.instances_per_app: self.instances_per_app = self.total_tasks_cout self.app_count = 1 else: self.app_count = self.total_tasks_cout/self.instances_per_app self.heath_check_interval = 30 self.test_duration = 20 self.marathon_cluster = MarathonClient(marathon_url, timeout=240) self.work_queue = Queue() self.result_queue = Queue() self.app_list_queue = Queue() self.action_list = [self.start_collect, 'sleep={}'.format(self.test_duration), self.get_stats] def remove_apps(self): apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith("/"+self.app_base_name): self.marathon_cluster.delete_app(app.id) active = 0 while True: apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith(self.app_base_name): active += 1 if active == 0: break def create_app(self, id): port_mapping = MarathonContainerPortMapping(container_port=80, protocol="tcp") app_docker = MarathonDockerContainer( image=self.docker_image, network="BRIDGE", force_pull_image=True, port_mappings=[port_mapping]) app_container = MarathonContainer(docker=app_docker) http_health_check = MarathonHealthCheck( protocol="HTTP", path="/status", grace_period_seconds=300, interval_seconds=self.heath_check_interval, timeout_seconds=20, max_consecutive_failures=0 ) app_suffix = str(md5(str(random())).hexdigest()) app_name = self.app_base_name + app_suffix new_app = MarathonApp(cpus=CPUS, mem=MEM, disk=DISK, container=app_container, health_checks=[http_health_check], instances=self.instances_per_app, max_launch_delay_seconds=5) print("Creating {}".format(app_name)) self.marathon_cluster.create_app(app_id=app_name, app=new_app) self.app_list_queue.put(app_name) return None def wait_instances(self, app_name): health_ok = 0 while health_ok < self.instances_per_app: health_ok = 0 tasks = self.marathon_cluster.list_tasks(app_name) for task in tasks: if task.health_check_results: health_ok += 1 def start_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/start_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was started') else: print(task['id']+': failed to start collecter') def stop_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/stop_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was stopped') else: print(task['id']+': failed to stop collecter') def clear_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/clear_stats' res = urlopen(url) if res.getcode() == 200: print(task['id']+': stats was dropped') else: print(task['id']+': stats was dropped') def get_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/get_timestamps' try: res = urlopen(url) except Exception: print("URL req failed") self.result_queue.put({'id': task['id'], 'status': 'Failed', 'data': []}) return if res.getcode() == 200: data = res.read() timestamps = data.split(',') self.result_queue.put({'id': task['id'], 'status': 'ok', 'data': timestamps}) elif res.getcode() == 202: print("Collecting is not enabled") self.result_queue.put({'id': task['id'], 'status': 'Collecting is not enabled', 'data': []}) else: print("Unknown response code") self.result_queue.put({'id': task['id'], 'status': 'Unknown response code', 'data': []}) def repeat(self, action): while self.work_queue.empty() is False: try: iteration = self.work_queue.get_nowait() except Empty: continue action(iteration) self.work_queue.task_done() def fill_queue(self, iterations): for iteration in iterations: self.work_queue.put(iteration) def get_tasks(self): res = [] tasks = self.marathon_cluster.list_tasks() for task in tasks: if not task.id.startswith('health-check-test-'): continue res.append({'id': str(task.id), 'host': str(task.host), 'port': str(task.ports[0])}) return res def create_apps(self): self.fill_queue(range(self.app_count)) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.create_app,)) worker.start() self.work_queue.join() while self.app_list_queue.empty() is False: try: app_name = self.app_list_queue.get_nowait() except Empty: continue self.work_queue.put(app_name) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.wait_instances,)) worker.start() self.work_queue.join() def start_test(self): task_list = self.get_tasks() for action in self.action_list: if isinstance(action, six.text_type): if action.startswith('sleep='): amount = int(action.split('=')[1]) sleep(60*amount) continue self.fill_queue(task_list) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(action,)) worker.start() self.work_queue.join() def generate_report(self): today = datetime.today() file_prefix = "{:%Y-%m-%d_%H_%M_%S-}".format(today) file_name = (file_prefix + 'health_check_result-' + str(self.total_tasks_cout) + 'tasks.csv') f = open(file_name, "w") f.write("Task ID,Health check timestamp") while self.result_queue.empty() is False: try: result = self.result_queue.get_nowait() except Empty: continue for timestamp in result['data']: f.write("\n%s,%s" % (result['id'], timestamp)) f.close()
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id, timeout=300): st_time = time.time() while(time.time() - st_time < timeout): try: try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a except: l.info("mcli: get_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli get_app timed out, possible zookeper/marathon/mesos malfunction") def delete_app(self, app_id, force=False, timeout=200): st_time = time.time() while(time.time() - st_time < timeout): try: self.mcli.delete_app(app_id, force) return except: l.info("mcli: delete_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli delete_app timed out, possible zookeper/marathon/mesos malfunction") def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count, sleep_before_next_try=1): cnt = 0 while True: a1 = self.get_app(app) # if tasks_running are greater (due to whatever reason, scale down accordingly) if a1.tasks_running > running_count: delta = a1.tasks_running - running_count l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count) self.scale_app(app, running_count) # Allow for some time before next poll time.sleep(1) continue if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(sleep_before_next_try) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale, timeout=300): st_time = time.time() while(time.time() - st_time < timeout): try: self.mcli.scale_app(app, scale) return except: l.info("mcli: scale_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception("mcli scale_app timed out, possible zookeper/marathon/mesos malfunction") def ping(self): return self.mcli.ping() def kill_task(self, app_id, task_id): return self.mcli.kill_task(app_id, task_id)
from marathon import MarathonClient import json import pandas as pd #参考链接:https://github.com/thefactory/marathon-python server = "http://10.20.0.3:8080" maraclient = MarathonClient(servers=server) # 显示所有app信息 app_list = maraclient.list_apps() #print(type(app_list)) app_calc_list = [] for i in app_list: # print(i) # break #print(i.id,i.instances,i.mem) temp_id = str(i.id).split("/")[-1] dict_calc = { "模块名称": temp_id, "实例数量": i.instances, "内存": i.mem, "总内存": i.instances * i.mem } app_calc_list.append(dict_calc) #print("模块名称: %s,数量: %d,总内存占用: %d" %(i.id,i.instances,i.instances * i.mem)) print(app_calc_list) df = pd.DataFrame(app_calc_list)
class HealthCheckBencher(object): def __init__(self, marathon_url, image, tasks): self.concurrency = 20 self.docker_image = image self.app_base_name = 'health-check-test-' self.total_tasks_cout = int(tasks) self.instances_per_app = 50 if tasks < self.instances_per_app: self.instances_per_app = self.total_tasks_cout self.app_count = 1 else: self.app_count = self.total_tasks_cout/self.instances_per_app self.heath_check_interval = 30 self.test_duration = 20 self.marathon_cluster = MarathonClient(marathon_url, timeout=240) self.work_queue = Queue() self.result_queue = Queue() self.app_list_queue = Queue() self.action_list = [self.start_collect, 'sleep={}'.format(self.test_duration), self.get_stats] def remove_apps(self): apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith("/"+self.app_base_name): self.marathon_cluster.delete_app(app.id) active = 0 while True: apps = self.marathon_cluster.list_apps() for app in apps: if app.id.startswith(self.app_base_name): active += 1 if active == 0: break def create_app(self, id): port_mapping = MarathonContainerPortMapping(container_port=80, protocol="tcp") app_docker = MarathonDockerContainer( image=self.docker_image, network="BRIDGE", force_pull_image=True, port_mappings=[port_mapping]) app_container = MarathonContainer(docker=app_docker) http_health_check = MarathonHealthCheck( protocol="HTTP", path="/status", grace_period_seconds=300, interval_seconds=self.heath_check_interval, timeout_seconds=20, max_consecutive_failures=0 ) app_suffix = str(md5(str(random())).hexdigest()) app_name = self.app_base_name + app_suffix new_app = MarathonApp(cpus=CPUS, mem=MEM, disk=DISK, container=app_container, health_checks=[http_health_check], instances=self.instances_per_app, max_launch_delay_seconds=5) print("Creating {}".format(app_name)) self.marathon_cluster.create_app(app_id=app_name, app=new_app) self.app_list_queue.put(app_name) return None def wait_instances(self, app_name): health_ok = 0 while health_ok < self.instances_per_app: health_ok = 0 tasks = self.marathon_cluster.list_tasks(app_name) for task in tasks: if task.health_check_results: health_ok += 1 def start_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/start_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was started') else: print(task['id']+': failed to start collecter') def stop_collect(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/stop_collect' res = urlopen(url) if res.getcode() == 200: print(task['id']+': collecter was stopped') else: print(task['id']+': failed to stop collecter') def clear_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/clear_stats' res = urlopen(url) if res.getcode() == 200: print(task['id']+': stats was dropped') else: print(task['id']+': stats was dropped') def get_stats(self, task): url = 'http://'+task['host']+':'+str(task['port'])+'/get_timestamps' try: res = urlopen(url) except Exception: print("URL req failed") self.result_queue.put({'id': task['id'], 'status': 'Failed', 'data': []}) return if res.getcode() == 200: data = res.read() timestamps = data.split(',') self.result_queue.put({'id': task['id'], 'status': 'ok', 'data': timestamps}) elif res.getcode() == 202: print("Collecting is not enabled") self.result_queue.put({'id': task['id'], 'status': 'Collecting is not enabled', 'data': []}) else: print("Unknown response code") self.result_queue.put({'id': task['id'], 'status': 'Unknown response code', 'data': []}) def repeat(self, action): while self.work_queue.empty() is False: try: iteration = self.work_queue.get_nowait() except Empty: continue action(iteration) self.work_queue.task_done() def fill_queue(self, iterations): for iteration in iterations: self.work_queue.put(iteration) def get_tasks(self): res = [] tasks = self.marathon_cluster.list_tasks() for task in tasks: if not task.id.startswith('health-check-test-'): continue res.append({'id': str(task.id), 'host': str(task.host), 'port': str(task.ports[0])}) return res def create_apps(self): self.fill_queue(range(self.app_count)) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.create_app,)) worker.start() self.work_queue.join() while self.app_list_queue.empty() is False: try: app_name = self.app_list_queue.get_nowait() except Empty: continue self.work_queue.put(app_name) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(self.wait_instances,)) worker.start() self.work_queue.join() def start_test(self): task_list = self.get_tasks() for action in self.action_list: if isinstance(action, basestring): if action.startswith('sleep='): amount = int(action.split('=')[1]) sleep(60*amount) continue self.fill_queue(task_list) for thread_num in range(self.concurrency): if self.work_queue.empty() is True: break worker = Thread(target=self.repeat, args=(action,)) worker.start() self.work_queue.join() def generate_report(self): today = datetime.today() file_prefix = "{:%Y-%m-%d_%H_%M_%S-}".format(today) file_name = (file_prefix + 'health_check_result-' + str(self.total_tasks_cout) + 'tasks.csv') f = open(file_name, "w") f.write("Task ID,Health check timestamp") while self.result_queue.empty() is False: try: result = self.result_queue.get_nowait() except Empty: continue for timestamp in result['data']: f.write("\n%s,%s" % (result['id'], timestamp)) f.close()
def list_applications(client: MarathonClient) -> list: return [(app.id, app.container.docker.image) for app in client.list_apps()]
def main(args): migration_hosts = args.hosts.replace('"','').replace('\'','').split(',') marathon_client = MarathonClient(args.url) # Get the running marathon application dictionary running_instances = utils.dict_by_key_and_value(lambda x: x.id, lambda y: y.instances, marathon_client.list_apps()) print(">>> Total Running Applications: ") print(json.dumps(running_instances, sort_keys=True, indent=4, separators=(',', ': '))) # Get the running marathon applications for all hosts which are going for maintenance all_tasks = marathon_client.list_tasks() filtered_tasks = [task for task in all_tasks if task.host in migration_hosts] dicted_tasks = utils.dict_by_key(lambda x: x.app_id, filtered_tasks) print(">>> Total Running Application: ") print(json.dumps(dicted_tasks.keys(), sort_keys=True, indent=4, separators=(',', ': '))) # Tasks migration migrate_tasks(marathon_client, dicted_tasks, migration_hosts, args.force)
class MarathonManager(object): def __init__(self, server): self._client = MarathonClient(server) def __repr__(self): return self.server def create(self, app): app._create(self._client) def discover(self, app_filter=None, env_filter=False): apps = set() for app in self._client.list_apps(): if not app_filter or ('_tonomi_application', app_filter) in app.labels.items(): if not env_filter: apps.add(reduce_app_name(app.id)) else: if '_tonomi_environment' in app.labels.keys(): env_name = app.labels['_tonomi_environment'] apps.add('/{}'.format(env_name)) return list(apps) def get_apps(self, app_type, env_name): env_name = env_name.replace('/', '') apps = [] for app in self._client.list_apps(): if ('_tonomi_environment', env_name) in app.labels.items() and ( '_tonomi_application', app_type) in app.labels.items(): apps.append(app) return [self._client.get_app(app.id) for app in apps] def get_app_host(self, app_type, env_name): while True: apps = self.get_apps(app_type=app_type, env_name=env_name) for app in apps: for task in app.tasks: host = task.host return host time.sleep(5) def health_check(self): pass def destroy(self, name): try: self._client.delete_group(name, force=True) except: pass def update(self): pass def restart(self): pass def scale_app(self, app_name, num): self._client.scale_app(app_name, num, force=True) def free_ports(self, num=1): return get_free_ports(self._client, num)
from marathon import MarathonClient marathon = MarathonClient(servers=['http://mesos-master0:8080']) for app in marathon.list_apps(): print app
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id, timeout=300): st_time = time.time() while (time.time() - st_time < timeout): try: try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a except: l.info("mcli: get_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli get_app timed out, possible zookeper/marathon/mesos malfunction" ) def delete_app(self, app_id, force=False, timeout=200): st_time = time.time() while (time.time() - st_time < timeout): try: self.mcli.delete_app(app_id, force) return except: l.info("mcli: delete_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli delete_app timed out, possible zookeper/marathon/mesos malfunction" ) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error( "Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str( e ).find('App is locked by one or more deployments. Override with the option' ) >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count, sleep_before_next_try=1): cnt = 0 while True: a1 = self.get_app(app) # if tasks_running are greater (due to whatever reason, scale down accordingly) if a1.tasks_running > running_count: delta = a1.tasks_running - running_count l.info("Found [%d] more apps, scaling down to [%d]", delta, running_count) self.scale_app(app, running_count) # Allow for some time before next poll time.sleep(1) continue if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(sleep_before_next_try) if (cnt % 30) == 29: l.info( "[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale, timeout=300): st_time = time.time() while (time.time() - st_time < timeout): try: self.mcli.scale_app(app, scale) return except: l.info("mcli: scale_app returned error") l.info(traceback.format_exc()) l.info("Retrying after 10 secs timeout=%d", timeout) time.sleep(10) raise Exception( "mcli scale_app timed out, possible zookeper/marathon/mesos malfunction" ) def ping(self): return self.mcli.ping() def kill_task(self, app_id, task_id): return self.mcli.kill_task(app_id, task_id)
def get_apps_dict(self): apps = {} for app in MarathonClient.list_apps(self): apps[app.id] = self.get_app_dict(app.id) return apps
def list_app(request): mc = MarathonClient("http://{}:{}".format(settings.MARATHON["host"], settings.MARATHON["port"])) apps = mc.list_apps() data = {"apps": apps} data["refresh"] = 3000 return render(request, "marathon_mgmt/list_app.html", data)
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id): try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a def delete_app(self, app_id, force=False): return self.mcli.delete_app(app_id, force) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise def create_app(self, app_id, attr): for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count): cnt = 0 while True: a1 = self.get_app(app) if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(1) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale): return self.mcli.scale_app(app, scale) def ping(self): return self.mcli.ping()
class MarathonAPI(object): user = None password = None host = None use_https = False port = None url = None marathon_cli = None dict_apps = {} def __init__(self, host, port=80, use_https=False, user=None, password=None): self.user = user self.password = password self.host = host self.use_https = use_https self.port = str(port) self.url = '{}://{}:{}/'.format('https' if use_https else 'http', host, port) try: self.marathon_cli = MarathonClient([self.url], username=self.user, password=self.password) except Exception as e: logger.critical(e) raise e def scaleOneApp(self, app_id, delta=None): logger.info( 'App: [{}] :: Scale {} Delta:[{}] Atual:[{}] Staged:[{}]'.format( app_id, 'up' if delta > 0 else 'down', delta, self.dict_apps[app_id].tasksRunning, self.dict_apps[app_id].tasksStaged)) try: self.marathon_cli.scale_app(app_id=app_id, delta=delta) except MarathonHttpError as e: logger.error(e.error_message) except: raise def findAppsWithAutoscaleLabels(self): list = self.marathon_cli.list_apps(embed_counts=True, embed_task_stats=True) logger.debug('Lista recebida {}'.format(list)) if len(list) == 0: logger.warning('0 apps loaded. Your marathon have apps?') for app in list: if LABEL_FOR_AUTOSCALE_ENABLE in app.labels: new_app = MarathonApp(app.id) new_app.tasksRunning = app.tasks_running new_app.tasksStaged = app.tasks_staged for label in MANDATORY_LABELS_APP: if label in app.labels: value = app.labels[label] if value.isnumeric(): value = int(value) new_app.__setattr__(label, value) else: logger.error( 'App: [{}] :: dont have MANDATORY_LABELS :: {}'. format(app.id, label)) for label in OPTIONAL_LABELS_APP: if label in app.labels: value = app.labels[label] if value.isnumeric(): value = int(value) new_app.__setattr__(label, value) self.dict_apps[app.id] = new_app else: logger.debug( 'App: [{}] :: dont have {} = True. If you want to scale, please add labels.' .format(app.id, LABEL_FOR_AUTOSCALE_ENABLE)) def scaleApps(self, rabbitmq): for app_id in self.dict_apps: app = self.dict_apps[app_id] required, delta = app.scaneRequired(rabbitmq) if required: self.scaleOneApp(app_id=app_id, delta=delta) else: logger.info('App: [{}] :: Not Required Scale'.format(app_id))
class MarathonIF(object): def __init__(self, marathon_addr, my_addr, mesos): self.mcli = MarathonClient(marathon_addr) self.myAddr = my_addr self.mesos = mesos def get_apps(self): listapps = self.mcli.list_apps() return listapps def get_app(self, app_id): try: a = self.mcli.get_app(app_id) except marathon.exceptions.NotFoundError as e: # NOQA return None return a def delete_app(self, app_id, force=False): return self.mcli.delete_app(app_id, force) def delete_deployment(self, dep_id): return self.mcli.delete_deployment(dep_id) def get_deployments(self): return self.mcli.list_deployments() def delete_app_ifexisting(self, app_id, trys=4): for idx in range(0, trys): try: a = self.get_app(app_id) if a: return self.delete_app(app_id) return None except: e = sys.exc_info()[0] pprint("<p>Error: %s</p>" % e) time.sleep(10) raise @staticmethod def is_valid_app_id(app_id): # allowed: lowercase letters, digits, hyphens, slash, dot if re.match("^[A-Za-z0-9-/.]*$", app_id): return True return False def create_app(self, app_id, attr): """ Create and start an app. :param app_id: (str) - Application ID :param attr: marathon.models.app.MarathonApp application to create. :return: the created app """ # Validate that app_id conforms to allowed naming scheme. if not self.is_valid_app_id(app_id): l.error("Error: Only lowercase letters, digits, hyphens are allowed in app_id. %s" % app_id) raise Exception("Invalid app_id") for idx in range(0, 10): try: a = self.mcli.create_app(app_id, attr) return a except marathon.exceptions.MarathonHttpError as e: if str(e).find('App is locked by one or more deployments. Override with the option') >= 0: time.sleep(1) else: raise raise def wait_app_removal(self, app): cnt = 0 while True: if not self.get_app(app): break time.sleep(0.2) cnt += 1 if cnt > 0: l.info("Stuck waiting for %s to be deleted CNT=%d" % (app, cnt)) return True def wait_app_ready(self, app, running_count): cnt = 0 while True: a1 = self.get_app(app) if a1.tasks_running == running_count: return a1 cnt += 1 time.sleep(1) if (cnt % 30) == 29: l.info("[%d]Waiting for task to move to running stage, " % cnt + "current stat staged=%d running=%d expected Running=%d" % (a1.tasks_staged, a1.tasks_running, running_count)) def scale_app(self, app, scale): return self.mcli.scale_app(app, scale) def ping(self): return self.mcli.ping()
def get_apps_dict(self): apps={} for app in MarathonClient.list_apps(self): apps[app.id]=self.get_app_dict(app.id) return apps
class MarathonAppCollector(object): APP_ATTIBUTES = ( "instances", "cpus", "mem", "disk", "backoffSeconds", "backoffFactor", "maxLaunchDelaySeconds", "container.docker.privileged", "container.docker.forcePullImage", "healthChecks.gracePeriodSeconds", "healthChecks.intervalSeconds", "healthChecks.maxConsecutiveFailures", "healthChecks.timeoutSeconds", "upgradeStrategy.minimumHealthCapacity", "upgradeStrategy.maximumOverCapacity", "tasksStaged", "tasksRunning", "tasksHealthy", "tasksUnhealthy", "taskStats.startedAfterLastScaling.stats.counts.staged", "taskStats.startedAfterLastScaling.stats.counts.running", "taskStats.startedAfterLastScaling.stats.counts.healthy", "taskStats.startedAfterLastScaling.stats.lifeTime.averageSeconds", "taskStats.startedAfterLastScaling.stats.lifeTime.medianSeconds", "taskStats.withLatestConfig.stats.counts.staged", "taskStats.withLatestConfig.stats.counts.running", "taskStats.withLatestConfig.stats.counts.healthy", "taskStats.withLatestConfig.stats.lifeTime.averageSeconds", "taskStats.withLatestConfig.stats.lifeTime.medianSeconds", "taskStats.totalSummary.stats.counts.staged", "taskStats.totalSummary.stats.counts.running", "taskStats.totalSummary.stats.counts.healthy", "taskStats.totalSummary.stats.lifeTime.averageSeconds", "taskStats.totalSummary.stats.lifeTime.medianSeconds", ) QUEUE_ATTRIBUTES = ( "count", "delay.overdue", "delay.timeLeftSeconds", ) def __init__(self, marathon_url=None): self.client = MarathonClient(marathon_url) def collect(self): result_dict = {} apps = self.client.list_apps(embed_task_stats=True) for app_attribute in self.APP_ATTIBUTES: metric_family = GaugeMetricFamily( self.get_metric_key(app_attribute, 'apps'), documentation='from v2/apps?embed=apps.taskStats value of %s' % app_attribute, labels=["id"]) for app in apps: labels = [app.id] value = self.get_metric_value(app_attribute, app) if value is None: continue metric_family.add_metric(labels, value) yield metric_family queue = self.client.list_queue() for queue_attribute in self.QUEUE_ATTRIBUTES: metric_family = GaugeMetricFamily( self.get_metric_key(queue_attribute, 'queue'), documentation='from v2/queue value of %s' % queue_attribute, labels=["id"]) for queue_item in queue: labels = [queue_item.app.id] value = self.get_metric_value(queue_attribute, queue_item) if value is None: continue metric_family.add_metric(labels, value) yield metric_family @classmethod def get_metric_value(cls, key, obj): if '.' in key: key_current, key_rest = key.split('.', 1) sub_obj = getattr(obj, to_snake_case(key_current), None) if sub_obj is None: return None return cls.get_metric_value(key_rest, sub_obj) return getattr(obj, to_snake_case(key), None) @classmethod def get_metric_key(cls, key, obj_type): return "marathon_%s_%s" % (obj_type, key.replace('.', '_')) @classmethod def generate_metric(cls, key, obj, obj_type, labels, value): return metric_family
parser.add_argument("-e", "--execute", help="Operation execute", choices=['delete', 'create'], required=True) parser.add_argument("-d", "--delete", help="Delete all applications", action="store_true") parser.add_argument("-c", "--concurrency", help="Concurrency") parser.add_argument("-n", "--nodes", help="Number of tasks per application") parser.add_argument("-s", "--silent", help="Print only results", action="store_true") args = parser.parse_args() cluster = MarathonClient(args.marathon, timeout=240) if args.execute == "delete": cluster = MarathonClient(args.marathon) all_apps = cluster.list_apps() for app in all_apps: print("Delete {}".format(app.id)) cluster.delete_app(app.id, force=True) if args.execute == "create": concur = 1 if args.concurrency is None else args.concurrency nodes = 1 if args.nodes is None else args.nodes concur_create_apps(int(concur), int(nodes))
parser = argparse.ArgumentParser() parser.add_argument("-m", "--marathon", help="Marathon URL, on example " "http://127.0.0.1:8080/marathon", required=True) parser.add_argument("-e", "--execute", help="Operation execute", choices=['delete', 'create'], required=True) parser.add_argument("-d", "--delete", help="Delete all applications", action="store_true") parser.add_argument("-c", "--concurrency", help="Concurrency") parser.add_argument("-n", "--nodes", help="Number of tasks per application") parser.add_argument("-s", "--silent", help="Print only results", action="store_true") args = parser.parse_args() cluster = MarathonClient(args.marathon, timeout=240) if args.execute == "delete": cluster = MarathonClient(args.marathon) all_apps = cluster.list_apps() for app in all_apps: print("Delete {}".format(app.id)) cluster.delete_app(app.id, force=True) if args.execute == "create": concur = 1 if args.concurrency is None else args.concurrency nodes = 1 if args.nodes is None else args.nodes concur_create_apps(int(concur), int(nodes))
def ajax_list_apps(request): mc = MarathonClient("http://{}:{}".format(settings.MARATHON["host"], settings.MARATHON["port"])) apps = mc.list_apps() data = {"apps": apps} return render(request, "marathon_mgmt/ajax_list_apps.html", data)
# res = maraclient.kill_tasks(i,scale=True) # except Exception as e: # print("出错了,错误代码%s" %e) # print("应用%s已经停止或者不存在" %i) # else: # print("应用%s停止成功"%i) # version:v3 from marathon import MarathonClient server = "http://10.30.0.6:8080" #实例化 maraclient = MarathonClient(servers=server) # 获取marathon的应用模块列表 allapp = maraclient.list_apps() # 提取应用模块的名称,即id allapp_list = [str(i.id) for i in allapp] #print(allapp_list) # 读取待下线模块文本,一行一个,但是不带分组. with open(r"applist_offline.txt", encoding="utf-8", mode="r") as f: for i in f: i = i.rstrip("\n") for j in allapp_list: temp_name = j.split("/")[-1] # 此步骤就是为了构建完整的模块名称。 if i == temp_name: try: res = maraclient.kill_tasks(j, scale=True)