def launch_random(): app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) app_name = app_config.app_name infile = jupiter_config.get_abs_app_dir( ) + '/' + app_name + "/app_config.yaml" # is this correct? outfile = "mapping.json" with open(infile, 'r') as f: app_conf = yaml.safe_load(f) dag_tasks = app_conf['application']['tasks']['dag_tasks'] tasks_names = [] for task in dag_tasks: tasks_names.append(task['name']) nodes_names = list(app_conf['node_map'].keys()) nodes_names.remove('home') output_mapping = {} for task_name in tasks_names: idx = random.randint(0, len(nodes_names) - 1) output_mapping[task_name] = nodes_names[idx] with open(outfile, "w") as f: json.dump(output_mapping, f, indent=4) log.info("Wrote mapping to file mapping.json. Ready to launch CIRCE.")
shutil.copy(src, dst) # build in parallel t1 = threading.Thread(target=build_push_home, args=(app_config.get_exec_home_tag(), )) t2 = threading.Thread(target=build_push_worker, args=(app_config.get_exec_worker_tag(), )) t1.start() t2.start() t1.join() t2.join() if __name__ == '__main__': if len(sys.argv) == 2: app_dir = "../app_specific_files/{}".format(sys.argv[1]) log.info("Setting app directory to: {}".format(app_dir)) if len(sys.argv) == 1: log.info("Defaulting to jupiter_config.py to set app directory.") app_dir = jupiter_config.get_abs_app_dir() log.info("Setting app directory to: {}".format(app_dir)) else: log.error("Please insert application name (same name as the app " + "directory under ${JUPITER_ROOT}/app_specific_files/") log.error("usage: python build_push_exec.py {APP_NAME}") exit() main(app_dir)
def launch_wave(): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-mapper" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) # manually set proxy k8s_apps_v1 = client.AppsV1Api() core_v1_api = client.CoreV1Api() exec_prof_home_ip = lookup_home_ip("-exec", app_config, core_v1_api) drupe_home_ip = lookup_home_ip("-profiler", app_config, core_v1_api) """ Create k8s service for the home task. This task will signal profiling for all the execution profiler workers and collect results. K8s services exposes ports of pods to the entire k8s cluster. This does not launch pods. """ home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = core_v1_api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = core_v1_api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) home_node_ip = resp.spec.cluster_ip all_workers_ips = [] all_workers_names = [] for node in app_config.node_map(): if node.startswith('home'): # skip scheduling tasks on the home node continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.service.generate( name=pod_name, port_mappings=jupiter_config.k8s_service_port_mappings()) try: resp = core_v1_api.create_namespaced_service(namespace, spec) log.debug("Service created. status = '%s'" % str(resp.status)) resp = core_v1_api.read_namespaced_service(pod_name, namespace) except ApiException: log.error("Unable to create service for {}".format(pod_name)) sys.exit(1) all_workers_ips.append(resp.spec.cluster_ip) all_workers_names.append(node) all_workers_ips = ':'.join(all_workers_ips) all_workers_names = ':'.join(all_workers_names) for node, host in app_config.node_map().items(): if node.startswith('home'): # do not deploy pods on home yet. will be done afterwards. continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_wave_worker_tag(), host=host, port_mappings=jupiter_config.k8s_deployment_port_mappings(), # inject any arbitrary environment variables here env_vars={ "NODE_NAME": node, "HOME_NODE_IP": home_node_ip, "DRUPE_WORKER_IPS": drupe_worker_names_to_ips(app_config, core_v1_api), "WORKER_NODE_NAMES": all_workers_names, "WORKER_NODE_IPS": all_workers_ips, "EXEC_PROF_HOME_IP": exec_prof_home_ip, "DRUPE_HOME_IP": drupe_home_ip, }) # # Call the Kubernetes API to create the deployment resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug("Deployment created. status ='%s'" % str(resp.status)) # check if worker deployment pods are running while check_workers_running(app_config, namespace) is False: log.debug("WAVE worker pods still deploying, waiting...") time.sleep(30) home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_wave_home_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "WORKER_NODE_NAMES": all_workers_names, "WORKER_NODE_IPS": all_workers_ips, "DRUPE_WORKER_IPS": drupe_worker_names_to_ips(app_config, core_v1_api), "EXEC_PROF_HOME_IP": exec_prof_home_ip, "FIRST_TASK": app_config.get_first_task(), "DRUPE_HOME_IP": drupe_home_ip }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("WAVE home deployment created. status = '%s'" % str(resp.status)) log.info('Successfully deployed WAVE') # Setup k8s proxy and retrieve mapping from WAVE home pod proxy_proc = setup_proxy(jupiter_config.kubectl_proxy_mapper()) svc_port, _ = jupiter_config.flask_port_mapping() url = f"http://localhost:{8081}/api/v1/" \ + f"namespaces/{namespace}/services/{app_config.app_name}-home:{svc_port}/proxy" log.info("Waiting for WAVE pod to boot...") log.info( f"namespaces/{namespace}/services/{app_config.app_name}-home:{svc_port}/proxy" ) time.sleep(10) while 1: try: log.debug('Trying to get the assignment from WAVE mapper') r = requests.get(url) mapping = json.dumps(r.json(), indent=4) log.info(f"mapping:\n{mapping}") if len(mapping) > 2: if "status" not in mapping: break except: log.debug("WAVE not finished, retry in 30 sec...") time.sleep(30) with open("mapping.json", 'w') as f: f.write(json.dumps(r.json(), indent=4)) log.info("Wrote mapping to file mapping.json. Ready to launch CIRCE.") # TODO: print message talking about killing proxy proxy_proc.kill()
def launch_heft(): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-mapper" app_name = app_config.app_name os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) # manually set proxy k8s_apps_v1 = client.AppsV1Api() core_v1_api = client.CoreV1Api() exec_prof_home_ip = lookup_home_ip("-exec", app_config, core_v1_api) log.info('Starting to deploy HEFT (a single home pod)') home_svc_name = app_name + "-home" spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = core_v1_api.create_namespaced_service(namespace, spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = core_v1_api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) exec_prof_home_ip = lookup_home_ip("-exec", app_config, core_v1_api) drupe_home_ip = lookup_home_ip("-profiler", app_config, core_v1_api) home_depl_spec = k8s_spec.deployment.generate( name=app_name + "-home", label=app_name + "-home", image=app_config.get_mapper_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "HOME_NODE_IP": resp.spec.cluster_ip, "DRUPE_WORKER_IPS": drupe_worker_names_to_ips(app_config, core_v1_api), "WORKER_NODE_NAMES": concat_worker_names(app_config), "EXEC_PROF_HOME_IP": exec_prof_home_ip, "DRUPE_HOME_IP": drupe_home_ip, "TASK_MAPPER": app_config.task_mapper(), }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("HEFT home deployment created. status = '%s'" % str(resp.status)) log.info('Successfully deployed HEFT') # Setup k8s proxy and retrieve mapping from HEFT pod proxy_proc = setup_proxy(jupiter_config.kubectl_proxy_mapper()) svc_port, _ = jupiter_config.flask_port_mapping() url = f"http://localhost:{8081}/api/v1/" \ + f"namespaces/{namespace}/services/{app_name}-home:{svc_port}/proxy" log.info("Waiting for HEFT pod to boot...") time.sleep(10) while 1: try: log.debug('Trying to get the assignment from HEFT mapper') r = requests.get(url) mapping = json.dumps(r.json(), indent=4) log.info(f"mapping:\n{mapping}") if len(mapping) != 0: if "status" not in mapping: break except: log.debug("HEFT not finished, retry in 30 sec...") time.sleep(30) with open("mapping.json", 'w') as f: f.write(json.dumps(r.json(), indent=4)) log.info("Wrote mapping to file mapping.json. Ready to launch CIRCE.") # TODO: print message talking about killing proxy proxy_proc.kill()
app_conf = yaml.safe_load(f) dag_tasks = app_conf['application']['tasks']['dag_tasks'] tasks_names = [] for task in dag_tasks: tasks_names.append(task['name']) nodes_names = list(app_conf['node_map'].keys()) nodes_names.remove('home') output_mapping = {} for task_name in tasks_names: idx = random.randint(0, len(nodes_names) - 1) output_mapping[task_name] = nodes_names[idx] with open(outfile, "w") as f: json.dump(output_mapping, f, indent=4) log.info("Wrote mapping to file mapping.json. Ready to launch CIRCE.") if __name__ == '__main__': app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) mapper_type = app_config.task_mapper().strip() if mapper_type == "heft" or mapper_type == "heft_duplicate" or mapper_type == "heft_balanced" or mapper_type == "heft_dup_no_comm_cost": launch_heft() elif mapper_type == "wave": launch_wave() elif app_config.task_mapper() == "random": launch_random() else: log.error("Unrecognized mapper in app_config.yaml")
def launch_circe(task_mapping): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-circe" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) api = client.CoreV1Api() k8s_apps_v1 = client.AppsV1Api() # Compile port mappings for k8s services for Jupiter and the application svc_port_mappings = jupiter_config.k8s_service_port_mappings() try: for idx, mapping in enumerate(app_config.port_mappings()): svc, docker = mapping.split(':') svc_port_mappings.append({ "name": f"custom{idx}", "port": int(svc), "targetPort": int(docker) }) except Exception as e: logging.debug('No application port mappings') # Compile port mappings for k8s deployments for Jupiter and the application depl_port_mappings = jupiter_config.k8s_deployment_port_mappings() try: for idx, mapping in enumerate(app_config.port_mappings()): svc, docker = mapping.split(':') depl_port_mappings.append({ "name": f"custom{idx}", "containerPort": int(docker) }) except Exception as e: logging.debug('No application port mappings') # *** Create Home Task Service *** home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate(name=home_svc_name, port_mappings=svc_port_mappings) resp = api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) home_task_ip = resp.spec.cluster_ip # *** Create DAG Task Services *** task_to_ip_string = create_services(app_config.app_name, namespace, app_config.get_dag_tasks(), api, svc_port_mappings) # *** Create Non-DAG Task Services *** nondag_task_to_ip_string = create_services(app_config.app_name, namespace, app_config.get_nondag_tasks(), api, svc_port_mappings) # *** Create DAG Task Deployments *** # Each DAG task to be launched on nodes designated by task_mapping # (e.g., derived from "mapping.json" file). Node names in task_mapping will # be mapped to the k8s hostname as indicated in app_config.yaml. node_map = app_config.node_map() for task in app_config.get_dag_tasks(): try: node = task_mapping[task['name']] k8s_hostname = node_map[node] except KeyError: log.fatal("Task missing in mapping file or node not in " + "app_config.yaml. Clean up with delete_all_circe.py.") exit() pod_name = app_config.app_name + '-' + task['name'] spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_circe_tag(), host=k8s_hostname, port_mappings=depl_port_mappings, # inject any arbitrary environment variables here env_vars={ "MY_TASK_NAME": task['name'], "CIRCE_HOME_IP": home_task_ip, "CIRCE_TASK_TO_IP": task_to_ip_string, "CIRCE_NONDAG_TASK_TO_IP": nondag_task_to_ip_string, }) resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug(f"DAG task deployment created. status={resp.status}") while check_dag_workers_running(app_config, namespace) is False: log.debug("CIRCE dag worker pods still deploying, waiting...") time.sleep(30) # *** Create Non-DAG Task Deployments *** for nondag_task in app_config.get_nondag_tasks(): pod_name = app_config.app_name + '-' + nondag_task['name'] spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_circe_tag(), host=nondag_task['k8s_host'], port_mappings=depl_port_mappings, # inject any arbitrary environment variables here env_vars={ "MY_TASK_NAME": nondag_task['name'], "CIRCE_HOME_IP": home_task_ip, "CIRCE_TASK_TO_IP": task_to_ip_string, "CIRCE_NONDAG_TASK_TO_IP": nondag_task_to_ip_string, }) resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug(f"Non-DAG task depl. created. status={resp.status}") while check_nondag_workers_running(app_config, namespace) is False: log.debug("CIRCE nondag worker pods still deploying, waiting...") time.sleep(30) # *** Create Home Task Deployment *** home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_circe_tag(), host=app_config.home_host(), port_mappings=depl_port_mappings, env_vars={ "MY_TASK_NAME": "home", "CIRCE_HOME_IP": home_task_ip, "CIRCE_TASK_TO_IP": task_to_ip_string, "CIRCE_NONDAG_TASK_TO_IP": nondag_task_to_ip_string, }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug(f"Home deployment created. status={resp.status}") log.info('CIRCE successfully deployed')
def main(): """ Deploy DRUPE in the system. """ # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-profiler" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) api = client.CoreV1Api() k8s_apps_v1 = client.AppsV1Api() """ This loads the task graph and node list """ all_profiler_map = dict() home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = api.read_namespaced_service(home_svc_name, namespace) except ApiException as e: log.error("Unable to read namespaced service") sys.exit(1) home_node_ip = resp.spec.cluster_ip all_profiler_map['home'] = resp.spec.cluster_ip logging.debug('Home Profilers were created successfully!') all_profiler_ips = [] all_profiler_names = [] for node in app_config.node_map(): """ Generate the yaml description of the required service for each task """ if node.startswith('home'): continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.service.generate( name=pod_name, port_mappings=jupiter_config.k8s_service_port_mappings()) try: resp = api.create_namespaced_service(namespace, spec) log.debug("Service created. status = '%s'" % str(resp.status)) resp = api.read_namespaced_service(pod_name, namespace) except ApiException as e: log.error("Unable to create service for {}".format(pod_name)) sys.exit(1) all_profiler_ips.append(resp.spec.cluster_ip) all_profiler_names.append(node) all_profiler_map[node] = resp.spec.cluster_ip all_profiler_ips = ':'.join(all_profiler_ips) all_profiler_names = ':'.join(all_profiler_names) logging.debug('Worker Profilers were created successfully!') for node, host in app_config.node_map().items(): if node.startswith('home'): # do not deploy pods on home yet. will be done afterwards. continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_drupe_worker_tag(), host=host, port_mappings=jupiter_config.k8s_deployment_port_mappings(), # inject any arbitrary environment variables here env_vars={ "NODE_NAME": node, "HOME_NODE_IP": home_node_ip, "ALL_NODE_IPS": all_profiler_ips, "ALL_NODE_NAMES": all_profiler_names, "NODE_IP": all_profiler_map[node] }) # # Call the Kubernetes API to create the deployment resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug("Deployment created. status ='%s'" % str(resp.status)) # check if worker deployment pods are running while check_workers_running(app_config, namespace) is False: log.debug("DRUPE profiler worker pods still deploying, waiting...") time.sleep(30) """ Create k8s deployment for home task and deploy it. """ home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_drupe_home_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "HOME_NODE_IP": home_node_ip, "ALL_NODE_IPS": all_profiler_ips, "ALL_NODE_NAMES": all_profiler_names, "NODE_IP": all_profiler_map["home"] }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("Home deployment created. status = '%s'" % str(resp.status)) pprint(all_profiler_map) logging.debug('Successfully deploy DRUPE ') return (all_profiler_map)
def main(): # Parse app's app_config.yaml app_config = app_config_parser.AppConfig(jupiter_config.get_abs_app_dir()) namespace = app_config.namespace_prefix() + "-exec" os.system(f"kubectl create namespace {namespace}") # Load kube config before executing k8s client API calls. config.load_kube_config(config_file=jupiter_config.get_kubeconfig()) api = client.CoreV1Api() k8s_apps_v1 = client.AppsV1Api() """ Create k8s service for the home task. This task will signal profiling for all the execution profiler workers and collect results. K8s services exposes ports of pods to the entire k8s cluster. This does not launch pods. """ home_svc_name = app_config.app_name + "-home" home_svc_spec = k8s_spec.service.generate( name=home_svc_name, port_mappings=jupiter_config.k8s_service_port_mappings()) resp = api.create_namespaced_service(namespace, home_svc_spec) log.debug("Home service created. status = '%s'" % str(resp.status)) try: resp = api.read_namespaced_service(home_svc_name, namespace) except ApiException: log.error("Unable to read namespaced service") sys.exit(1) home_node_ip = resp.spec.cluster_ip """ Create k8s service for all execution profiler workers. There is one worker per "worker_tasks" in the app's app_config.yaml. This service exposes the ports of the pods to the entire k8s cluster. This does not launch pods. """ # to be injected into environment variables all_profiler_ips = [] all_profiler_names = [] for node in app_config.node_map(): if node.startswith('home'): # skip scheduling tasks on the home node continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.service.generate( name=pod_name, port_mappings=jupiter_config.k8s_service_port_mappings()) try: resp = api.create_namespaced_service(namespace, spec) log.debug("Service created. status = '%s'" % str(resp.status)) resp = api.read_namespaced_service(pod_name, namespace) except ApiException: log.error("Unable to create service for {}".format(pod_name)) sys.exit(1) all_profiler_ips.append(resp.spec.cluster_ip) all_profiler_names.append(node) all_profiler_ips = ':'.join(all_profiler_ips) all_profiler_names = ':'.join(all_profiler_names) """ Create k8s deployments for each worker task. Then, deploy it on the k8s cluster. """ for node, host in app_config.node_map().items(): if node.startswith('home'): # do not deploy pods on home yet. will be done afterwards. continue pod_name = app_config.app_name + '-' + node spec = k8s_spec.deployment.generate( name=pod_name, label=pod_name, image=app_config.get_exec_worker_tag(), host=host, port_mappings=jupiter_config.k8s_deployment_port_mappings(), # inject any arbitrary environment variables here env_vars={ "NODE_NAME": node, "HOME_NODE_IP": home_node_ip, "ALL_PROFILER_IPS": all_profiler_ips, "ALL_PROFILER_NAMES": all_profiler_names }) # # Call the Kubernetes API to create the deployment resp = k8s_apps_v1.create_namespaced_deployment(body=spec, namespace=namespace) log.debug("Deployment created. status ='%s'" % str(resp.status)) # check if worker deployment pods are running while check_workers_running(app_config, namespace) is False: log.debug("Execution profiler worker pods still deploying, waiting...") time.sleep(30) """ Create k8s deployment for home task and deploy it. """ home_depl_spec = k8s_spec.deployment.generate( name=app_config.app_name + "-home", label=app_config.app_name + "-home", image=app_config.get_exec_home_tag(), host=app_config.home_host(), port_mappings=jupiter_config.k8s_deployment_port_mappings(), env_vars={ "NODE_NAME": "home", "HOME_NODE_IP": home_node_ip, "ALL_PROFILER_IPS": all_profiler_ips, "ALL_PROFILER_NAMES": all_profiler_names }) resp = k8s_apps_v1.create_namespaced_deployment(body=home_depl_spec, namespace=namespace) log.debug("Home deployment created. status = '%s'" % str(resp.status)) log.info('Successfully deployed execution profiler.')