Exemplo n.º 1
0
 def _get_free_namespace(self):
     while True:
         namespace = "gs-" + random_string(6)
         try:
             self._core_api.read_namespace(namespace)
         except K8SApiException as e:
             if e.status != 404:
                 raise RuntimeError(str(e))
             return namespace
Exemplo n.º 2
0
    def __init__(self,
                 hosts=None,
                 port=None,
                 etcd_addrs=None,
                 num_workers=None,
                 vineyard_socket=None,
                 timeout_seconds=None,
                 vineyard_shared_mem=None,
                 **kwargs):
        self._hosts = hosts
        self._port = port
        self._etcd_addrs = etcd_addrs
        self._num_workers = num_workers
        self._vineyard_socket = vineyard_socket
        self._timeout_seconds = timeout_seconds
        self._vineyard_shared_mem = vineyard_shared_mem

        self._instance_id = random_string(6)
        self._proc = None
        self._closed = True
Exemplo n.º 3
0
    def __init__(self,
                 api_client=None,
                 k8s_namespace=None,
                 k8s_service_type=None,
                 num_workers=None,
                 preemptive=None,
                 k8s_gs_image=None,
                 k8s_etcd_image=None,
                 k8s_image_pull_policy=None,
                 k8s_image_pull_secrets=None,
                 k8s_vineyard_daemonset=None,
                 k8s_vineyard_cpu=None,
                 k8s_vineyard_mem=None,
                 vineyard_shared_mem=None,
                 k8s_engine_cpu=None,
                 k8s_engine_mem=None,
                 k8s_coordinator_cpu=None,
                 k8s_coordinator_mem=None,
                 etcd_addrs=None,
                 k8s_etcd_num_pods=None,
                 k8s_etcd_cpu=None,
                 k8s_etcd_mem=None,
                 k8s_mars_worker_cpu=None,
                 k8s_mars_worker_mem=None,
                 k8s_mars_scheduler_cpu=None,
                 k8s_mars_scheduler_mem=None,
                 with_mars=None,
                 k8s_volumes=None,
                 timeout_seconds=None,
                 dangling_timeout_seconds=None,
                 k8s_waiting_for_delete=None,
                 mount_dataset=None,
                 k8s_dataset_image=None,
                 **kwargs):
        self._api_client = api_client
        self._core_api = kube_client.CoreV1Api(api_client)
        self._app_api = kube_client.AppsV1Api(api_client)
        self._rbac_api = kube_client.RbacAuthorizationV1Api(api_client)

        self._saved_locals = locals()

        self._namespace = self._saved_locals["k8s_namespace"]
        self._image_pull_secrets = self._saved_locals["k8s_image_pull_secrets"]
        if self._image_pull_secrets is None:
            self._image_pull_secrets = []
        elif not isinstance(self._image_pull_secrets, list):
            self._image_pull_secrets = [self._image_pull_secrets]
        self._image_pull_secrets_str = ",".join(self._image_pull_secrets)

        self._instance_id = random_string(6)
        self._role_name = self._role_name_prefix + self._instance_id
        self._role_binding_name = self._role_binding_name_prefix + self._instance_id
        self._cluster_role_name = ""
        self._cluster_role_binding_name = ""

        # all resource created inside namsapce
        self._resource_object = []

        self._coordinator_name = self._coordinator_name_prefix + self._instance_id
        self._coordinator_service_name = (
            self._coordinator_service_name_prefix + self._instance_id)
        # environment variable
        self._coordinator_envs = kwargs.pop("coordinator_envs", dict())

        if "GS_COORDINATOR_MODULE_NAME" in os.environ:
            self._coordinator_module_name = os.environ[
                "GS_COORDINATOR_MODULE_NAME"]
        else:
            self._coordinator_module_name = "gscoordinator"

        self._closed = False

        # pods watcher
        self._coordinator_pods_watcher = []
        self._logs = []

        self._delete_namespace = False
Exemplo n.º 4
0
    def __init__(self,
                 api_client=None,
                 namespace=None,
                 service_type=None,
                 minikube_vm_driver=None,
                 num_workers=None,
                 gs_image=None,
                 etcd_image=None,
                 gie_graph_manager_image=None,
                 zookeeper_image=None,
                 image_pull_policy=None,
                 image_pull_secrets=None,
                 vineyard_cpu=None,
                 vineyard_mem=None,
                 vineyard_shared_mem=None,
                 engine_cpu=None,
                 engine_mem=None,
                 coordinator_cpu=None,
                 coordinator_mem=None,
                 timeout_seconds=None,
                 waiting_for_delete=None,
                 **kwargs):
        self._api_client = api_client
        self._core_api = kube_client.CoreV1Api(api_client)
        self._app_api = kube_client.AppsV1Api(api_client)
        self._rbac_api = kube_client.RbacAuthorizationV1Api(api_client)

        self._namespace = namespace
        self._service_type = service_type
        self._minikube_vm_driver = minikube_vm_driver
        self._gs_image = gs_image
        self._num_workers = num_workers
        self._etcd_image = etcd_image
        self._gie_graph_manager_image = gie_graph_manager_image
        self._zookeeper_image = zookeeper_image

        self._image_pull_policy = image_pull_policy
        self._image_pull_secrets = image_pull_secrets
        if self._image_pull_secrets is None:
            self._image_pull_secrets = []
        elif not isinstance(self._image_pull_secrets, list):
            self._image_pull_secrets = [self._image_pull_secrets]

        self._vineyard_cpu = vineyard_cpu
        self._vineyard_mem = vineyard_mem
        self._vineyard_shared_mem = vineyard_shared_mem
        self._engine_cpu = engine_cpu
        self._engine_mem = engine_mem
        self._waiting_for_delete = waiting_for_delete

        self._cluster_role_name = ""
        self._cluster_role_binding_name = ""

        # all resource created inside namsapce
        self._resource_object = []

        self._coordinator_name = self._coordinator_name_prefix + random_string(
            6)
        self._coordinator_service_name = (
            self._coordinator_service_name_prefix + random_string(6))
        self._coordinator_cpu = coordinator_cpu
        self._coordinator_mem = coordinator_mem
        # environment variable
        self._coordinator_envs = kwargs.pop("coordinator_envs", dict())

        self._closed = False
        self._timeout_seconds = timeout_seconds

        # pods watcher
        self._coordinator_pods_watcher = []
        self._logs = []

        self._delete_namespace = False
Exemplo n.º 5
0
    def __init__(
        self,
        namespace=None,
        service_type=None,
        gs_image=None,
        etcd_image=None,
        zookeeper_image=None,
        gie_graph_manager_image=None,
        coordinator_name=None,
        coordinator_service_name=None,
        engine_cpu=None,
        engine_mem=None,
        vineyard_cpu=None,
        vineyard_mem=None,
        vineyard_shared_mem=None,
        image_pull_policy=None,
        image_pull_secrets=None,
        num_workers=None,
        log_level=None,
        timeout_seconds=None,
        waiting_for_delete=None,
        delete_namespace=None,
        **kwargs
    ):
        try:
            kube_config.load_incluster_config()
        except:  # noqa: E722
            kube_config.load_kube_config()
        self._api_client = kube_client.ApiClient()
        self._core_api = kube_client.CoreV1Api(self._api_client)
        self._app_api = kube_client.AppsV1Api(self._api_client)

        # random for multiple k8s cluster in the same namespace
        self._engine_name = self._engine_name_prefix + random_string(6)
        self._etcd_name = self._etcd_name_prefix + random_string(6)
        self._etcd_service_name = self._etcd_service_name_prefix + random_string(6)

        self._gie_graph_manager_name = (
            self._gie_graph_manager_name_prefix + random_string(6)
        )
        self._gie_graph_manager_service_name = (
            self._gie_graph_manager_service_name_prefix + random_string(6)
        )
        self._vineyard_service_name = (
            self._vineyard_service_name_prefix + random_string(6)
        )

        self._namespace = namespace
        self._service_type = service_type
        self._num_workers = num_workers

        self._coordinator_name = coordinator_name
        self._coordinator_service_name = coordinator_service_name

        self._resource_object = []

        # engine container info
        self._gs_image = gs_image
        self._engine_cpu = engine_cpu
        self._engine_mem = engine_mem

        # vineyard container info
        self._vineyard_cpu = vineyard_cpu
        self._vineyard_mem = vineyard_mem
        self._vineyard_shared_mem = vineyard_shared_mem

        # etcd pod info
        self._etcd_image = etcd_image

        # zookeeper pod info
        self._zookeeper_image = zookeeper_image

        # interactive engine graph manager info
        self._gie_graph_manager_image = gie_graph_manager_image

        self._image_pull_policy = image_pull_policy

        # image pull secrets
        self._etcd_endpoint = None
        if image_pull_secrets is not None:
            self._image_pull_secrets = image_pull_secrets.split(",")
        else:
            self._image_pull_secrets = []

        self._host0 = None
        self._pod_name_list = None
        self._pod_ip_list = None
        self._pod_host_ip_list = None

        self._analytical_engine_endpoint = None
        self._vineyard_service_endpoint = None

        self._closed = False
        self._glog_level = parse_as_glog_level(log_level)
        self._timeout_seconds = timeout_seconds
        self._waiting_for_delete = waiting_for_delete
        self._delete_namespace = delete_namespace

        self._analytical_engine_process = None

        # 8000 ~ 9000 is exposed
        self._learning_engine_ports_usage = 8000
        self._graphlearn_services = dict()
        self._learning_instance_processes = {}