示例#1
0
def dockerjson_pv(
        pull_secret,
        name=None, 
        filename='.dockerconfigjson',
        project_to='/kaniko/.docker/config.json'):
    """
    Creates V1Volume volume projection from kubernetes pull secret
    """
    from os.path import basename, dirname
    from kubernetes import client
    
    if not name:
        from uuid import uuid1
        name='vol-' + str(uuid1())[:12]
    
    return k8sc.V1Volume(
        name=name,
        projected=k8sc.V1ProjectedVolumeSource(sources=[
            k8sc.V1VolumeProjection(
                secret=k8sc.V1SecretProjection(
                    name=pull_secret, 
                    items=[k8sc.V1KeyToPath(key=filename, path=basename(project_to))]
                )
            )
        ])
    )
示例#2
0
 def _use_config_map(task):
     config_map = k8s.V1ConfigMapVolumeSource(
         name=name,
         items=[k8s.V1KeyToPath(key=key, path=key) \
             for key in key_path_mapper]
     )
     return task \
         .add_volume(k8s.V1Volume(config_map=config_map, name=name)) \
         .add_volume_mount(k8s.V1VolumeMount(mount_path=mount_path, name=name))
示例#3
0
def create_job_object(job_arguments, size, docker_image, docker_image_tag,
                      affinity):

    user = os.environ['USER']
    job = client.V1Job(
        metadata=client.V1ObjectMeta(
            name='kaml-remote-{}-{}'.format(user, uuid.uuid1())),
        spec=client.V1JobSpec(template=client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(name='kaml-remote-{}-{}'.format(
                user, uuid.uuid1()),
                                         labels={'type': size}),
            spec=client.V1PodSpec(containers=[
                client.V1Container(
                    name='kaml-remote',
                    args=job_arguments,
                    image='{}:{}'.format(docker_image, docker_image_tag),
                    image_pull_policy='Always',
                    env=[client.V1EnvVar(name='KAML_HOME', value='/app')],
                    volume_mounts=[
                        client.V1VolumeMount(name='kaml-cfg-volume',
                                             read_only=True,
                                             mount_path='/app/kaml.cfg',
                                             sub_path='kaml.cfg'),
                        client.V1VolumeMount(
                            name='gcp-service-account',
                            read_only=True,
                            mount_path='/app/service-key.json',
                            sub_path='service-key.json'),
                    ])
            ],
                                  affinity=affinity,
                                  volumes=[
                                      client.V1Volume(name='kaml-cfg-volume',
                                                      config_map=client.
                                                      V1ConfigMapVolumeSource(
                                                          name='kaml-cfg')),
                                      client.V1Volume(
                                          name='gcp-service-account',
                                          secret=client.V1SecretVolumeSource(
                                              secret_name='gcp-service-account',
                                              items=[
                                                  client.V1KeyToPath(
                                                      key='service-key.json',
                                                      path='service-key.json')
                                              ]))
                                  ],
                                  restart_policy='Never'))))

    return (job)
示例#4
0
    def _use_pull_secret(task):
        from os.path import basename, dirname
        from kubernetes import client as k8sc

        return (task.add_volume(
            k8sc.V1Volume(
                name='registrycreds',
                projected=k8sc.V1ProjectedVolumeSource(sources=[
                    k8sc.V1VolumeProjection(secret=k8sc.V1SecretProjection(
                        name=secret_name,
                        items=[
                            k8sc.V1KeyToPath(key=filename,
                                             path=basename(project_to))
                        ]))
                ]))).add_volume_mount(
                    k8sc.V1VolumeMount(name='registrycreds',
                                       mount_path=dirname(project_to))))
示例#5
0
def add_acr_config(kube_manager, pod_spec, namespace):
    secret_name = constants.AZURE_ACR_CREDS_SECRET_NAME
    if not kube_manager.secret_exists(secret_name, namespace):
        raise Exception("Secret '{}' not found in namespace '{}'".format(
            secret_name, namespace))

    volume_mount = client.V1VolumeMount(name='acr-config',
                                        mount_path='/kaniko/.docker/',
                                        read_only=True)

    if pod_spec.containers[0].volume_mounts:
        pod_spec.containers[0].volume_mounts.append(volume_mount)
    else:
        pod_spec.containers[0].volume_mounts = [volume_mount]

    items = [client.V1KeyToPath(key='.dockerconfigjson', path='config.json')]
    volume = client.V1Volume(name='acr-config',
                             secret=client.V1SecretVolumeSource(
                                 secret_name=secret_name, items=items))
    if pod_spec.volumes:
        pod_spec.volumes.append(volume)
    else:
        pod_spec.volumes = [volume]
def get_task_manager_boilerplate(
        job: KubernetesFlinkJob) -> client.V1Deployment:
    from ai_flow.application_master.master import GLOBAL_MASTER_CONFIG
    dep_resource_metadata = client.V1ObjectMeta(name='flink-task-manager-' +
                                                str(job.uuid))

    mount_path = '/opt/ai-flow/project'
    volume_mount = client.V1VolumeMount(name='download-volume',
                                        mount_path=mount_path)
    flink_config_volume_mount = client.V1VolumeMount(
        name="flink-config-volume", mount_path="/opt/flink/conf")
    init_args_default = [
        str(job.job_config.properties),
        str(job.job_context.workflow_execution_id),
        job.job_config.project_path, mount_path
    ]

    init_container = client.V1Container(
        name='init-container',
        image=GLOBAL_MASTER_CONFIG['ai_flow_base_init_image'],
        image_pull_policy='Always',
        command=["python", "/app/download.py"],
        args=init_args_default,
        volume_mounts=[volume_mount, flink_config_volume_mount])
    volume = client.V1Volume(name='download-volume')

    task_manager_args = [
        "task-manager", "-Djobmanager.rpc.address=" +
        'flink-job-cluster-{}-svc'.format(job.uuid)
    ]

    try:
        flink_conf = job.job_config.flink_conf
        for key, value in flink_conf.items():
            task_manager_args.extend(["-D{}={}".format(key, value)])
    except KeyError:
        pass

    workflow_id_env = client.V1EnvVar(
        name='WORKFLOW_ID', value=str(job.job_context.workflow_execution_id))
    execution_config_env = client.V1EnvVar(name='CONFIG_FILE_NAME',
                                           value=job.config_file)

    # flink_conf.yaml config map volume
    config_name = "flink-config-{}".format(job.uuid)
    key_to_path_list = []
    key_to_path_list.append(
        client.V1KeyToPath(key="flink-conf.yaml", path="flink-conf.yaml"))
    key_to_path_list.append(
        client.V1KeyToPath(key="log4j.properties", path="log4j.properties"))
    key_to_path_list.append(
        client.V1KeyToPath(key="log4j-cli.properties",
                           path="log4j-cli.properties"))
    flink_config_volume = client.V1Volume(
        name="flink-config-volume",
        config_map=client.V1ConfigMapVolumeSource(name=config_name,
                                                  items=key_to_path_list))

    task_manager_container_image = None
    if 'flink_ai_flow_base_image' in GLOBAL_MASTER_CONFIG:
        task_manager_container_image = GLOBAL_MASTER_CONFIG[
            'flink_ai_flow_base_image']
    try:
        if job.job_config.image is not None:
            task_manager_container_image = job.job_config.image
    except KeyError:
        pass
    if task_manager_container_image is None:
        raise Exception("flink_ai_flow_base_image not set")

    tm_container = client.V1Container(
        name='flink-task-manager-' + str(job.uuid),
        image=task_manager_container_image,
        command=['/docker-entrypoint.sh'],
        args=task_manager_args,
        env=[workflow_id_env, execution_config_env],
        volume_mounts=[volume_mount])

    try:
        tm_resource = job.job_config.resources['taskmanager']
        tm_container.resources = client.V1ResourceRequirements(
            requests=tm_resource)
    except KeyError:
        pass

    containers = [tm_container]
    labels = {'app': 'flink', 'component': 'task-manager-' + str(job.uuid)}
    pod_template = client.V1PodTemplateSpec(
        metadata=client.V1ObjectMeta(labels=labels),
        spec=client.V1PodSpec(containers=containers,
                              init_containers=[init_container],
                              volumes=[volume, flink_config_volume]))

    labels = {'app': 'flink', 'component': 'task-manager-' + str(job.uuid)}
    deployment_spec = client.V1DeploymentSpec(
        replicas=job.job_config.parallelism,
        template=pod_template,
        selector={'matchLabels': labels})
    dep_resource = client.V1Deployment(api_version='extensions/v1beta1',
                                       kind='Deployment',
                                       spec=deployment_spec,
                                       metadata=dep_resource_metadata)
    return dep_resource
def get_flink_session_cluster_boilerplate(
        job: KubernetesFlinkJob) -> client.V1Job:
    from ai_flow.application_master.master import GLOBAL_MASTER_CONFIG
    job_master_args_default = [
        "session-cluster", "--job-classname", job.job_config.main_class,
        "-Djobmanager.rpc.address=flink-job-cluster-{}-svc".format(job.uuid),
        "-Dparallelism.default=1", "-Dblob.server.port=6124",
        "-Dqueryable-state.server.ports=6125"
    ]
    rpc_container_port = client.V1ContainerPort(name='rpc',
                                                container_port=6123)
    blob_container_port = client.V1ContainerPort(name='blob',
                                                 container_port=6124)
    query_container_port = client.V1ContainerPort(name='query',
                                                  container_port=6125)
    ui_container_port = client.V1ContainerPort(name='ui', container_port=8081)
    mount_path = '/opt/ai-flow/project'
    volume_mount = client.V1VolumeMount(name='download-volume',
                                        mount_path=mount_path)
    flink_config_volume_mount = client.V1VolumeMount(
        name="flink-config-volume", mount_path="/opt/flink/conf")
    workflow_id_env = client.V1EnvVar(
        name='WORKFLOW_ID', value=str(job.job_context.workflow_execution_id))
    execution_config_env = client.V1EnvVar(name='CONFIG_FILE_NAME',
                                           value=job.config_file)
    if job.job_config.language_type == LanguageType.PYTHON:
        language_type_env = client.V1EnvVar(name='LANGUAGE_TYPE',
                                            value='python')
    else:
        language_type_env = client.V1EnvVar(name='LANGUAGE_TYPE', value='java')

    entry_module_path_env = client.V1EnvVar(
        name='ENTRY_MODULE_PATH',
        value=job.job_config.properties['entry_module_path'])
    flink_job_master_rpc_address_env = client.V1EnvVar(
        name='FLINK_JOB_MASTER_RPC_ADDRESS',
        value="flink-job-cluster-{}-svc".format(job.uuid))

    job_master_container_image = None
    if 'flink_ai_flow_base_image' in GLOBAL_MASTER_CONFIG:
        job_master_container_image = GLOBAL_MASTER_CONFIG[
            'flink_ai_flow_base_image']
    if job.job_config.image is not None:
        job_master_container_image = job.job_config.image

    if job_master_container_image is None:
        raise Exception("flink_ai_flow_base_image not set")

    job_master_container = client.V1Container(
        name='flink-job-master-{}'.format(job.uuid),
        image=job_master_container_image,
        image_pull_policy='Always',
        ports=[
            rpc_container_port, blob_container_port, query_container_port,
            ui_container_port
        ],
        command=['/docker-entrypoint.sh'],
        args=job_master_args_default,
        volume_mounts=[volume_mount, flink_config_volume_mount],
        env=[
            workflow_id_env, execution_config_env,
            flink_job_master_rpc_address_env, entry_module_path_env,
            language_type_env
        ])

    try:
        jm_resources = job.job_config.resources['jobmanager']
        job_master_container.resources = client.V1ResourceRequirements(
            requests=jm_resources)
    except KeyError:
        pass

    init_args_default = [
        str(job.job_config.properties),
        str(job.job_context.workflow_execution_id),
        job.job_config.project_path, mount_path
    ]
    init_container = client.V1Container(
        name='init-container',
        image=GLOBAL_MASTER_CONFIG['ai_flow_base_init_image'],
        image_pull_policy='Always',
        command=["python", "/app/download.py"],
        args=init_args_default,
        volume_mounts=[volume_mount])
    volume = client.V1Volume(name='download-volume')

    # flink_conf.yaml config map volume
    config_name = "flink-config-{}".format(job.uuid)
    key_to_path_list = []
    key_to_path_list.append(
        client.V1KeyToPath(key="flink-conf.yaml", path="flink-conf.yaml"))
    key_to_path_list.append(
        client.V1KeyToPath(key="log4j.properties", path="log4j.properties"))
    key_to_path_list.append(
        client.V1KeyToPath(key="log4j-cli.properties",
                           path="log4j-cli.properties"))
    flink_config_volume = client.V1Volume(
        name="flink-config-volume",
        config_map=client.V1ConfigMapVolumeSource(name=config_name,
                                                  items=key_to_path_list))
    pod_spec = client.V1PodSpec(restart_policy='Never',
                                containers=[job_master_container],
                                init_containers=[init_container],
                                volumes=[volume, flink_config_volume])
    labels = {'app': 'flink', 'component': 'job-cluster-' + str(job.uuid)}
    object_meta = client.V1ObjectMeta(
        labels=labels,
        annotations={
            ANNOTATION_WATCHED: 'True',
            ANNOTATION_JOB_ID: str(job.instance_id),
            ANNOTATION_WORKFLOW_ID: str(job.job_context.workflow_execution_id),
            ANNOTATION_JOB_UUID: str(job.uuid)
        })
    template_spec = client.V1PodTemplateSpec(metadata=object_meta,
                                             spec=pod_spec)
    job_spec = client.V1JobSpec(template=template_spec, backoff_limit=0)
    object_meta = client.V1ObjectMeta(labels=labels,
                                      name=generate_job_name(job))
    job = client.V1Job(metadata=object_meta,
                       spec=job_spec,
                       api_version='batch/v1',
                       kind='Job')
    return job
示例#8
0
 def create_config_map(self):
     items = [client.V1KeyToPath(key=self.source_key, path=self.source_path)]
     return client.V1ConfigMapVolumeSource(name=self.config_map_name,
                                           items=items)
示例#9
0
def deploy_fake_cega(deploy_lega):
    """Deploy the Fake CEGA."""
    _here = Path(__file__).parent
    trace_file = Path(_here / 'config/trace.ini')
    assert trace_file.exists(),  "No trace file!"
    trace_config = configparser.ConfigParser()
    trace_config.read(trace_file)

    with open(_here / 'extras/server.py') as users_init:
        init_users = users_init.read()

    with open(_here / 'extras/users.html') as user_list:
        users = user_list.read()

    with open(_here / 'extras/cega-mq.sh') as ceg_mq_init:
        cega_init_mq = ceg_mq_init.read()

    with open(_here / 'config/cega.config') as cega_config:
        cega_config_mq = cega_config.read()

    with open(_here / 'config/cega.json') as cega_defs:
        cega_defs_mq = cega_defs.read()

    user_pub = trace_config['secrets']['cega_user_public_key']
    ports_mq_management = [client.V1ServicePort(name="http", protocol="TCP", port=15672, target_port=15672)]
    ports_mq = [client.V1ServicePort(name="amqp", protocol="TCP", port=5672, target_port=5672),
                client.V1ServicePort(name="epmd", protocol="TCP", port=4369, target_port=4369),
                client.V1ServicePort(name="rabbitmq-dist", protocol="TCP", port=25672, target_port=25672)]

    deploy_lega.config_map('users-config', {'server.py': init_users, 'users.html': users,
                           'ega-box-999.yml': f'---\npubkey: {user_pub}'})
    env_users_inst = client.V1EnvVar(name="LEGA_INSTANCES", value="lega")
    env_users_creds = client.V1EnvVar(name="CEGA_REST_lega_PASSWORD",
                                      value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='cega-creds',
                                                                                                                 key="credentials")))
    mount_users = client.V1VolumeMount(name="users-config", mount_path='/cega')
    users_map = client.V1ConfigMapProjection(name="users-config",
                                             items=[client.V1KeyToPath(key="server.py", path="server.py"),
                                                    client.V1KeyToPath(key="users.html", path="users.html"),
                                                    client.V1KeyToPath(key="ega-box-999.yml", path="users/ega-box-999.yml"),
                                                    client.V1KeyToPath(key="ega-box-999.yml", path="users/lega/ega-box-999.yml")])
    users_vol = client.V1VolumeProjection(config_map=users_map)
    volume_users = client.V1Volume(name="users-config",
                                   projected=client.V1ProjectedVolumeSource(sources=[users_vol]))

    deploy_lega.config_map('cega-mq-entrypoint', {'cega-mq.sh': cega_init_mq})
    deploy_lega.config_map('cega-mq-config', {'defs.json': cega_defs_mq, 'rabbitmq.config': cega_config_mq})
    deploy_lega.persistent_volume("cega-rabbitmq", "1Gi")
    deploy_lega.persistent_volume_claim("cega-mq-storage", "cega-rabbitmq", "1Gi")
    mount_cega_temp = client.V1VolumeMount(name="cega-mq-temp", mount_path='/temp')
    mount_cega_rabbitmq = client.V1VolumeMount(name="cega-rabbitmq", mount_path='/etc/rabbitmq')
    volume_cega_temp = client.V1Volume(name="cega-mq-temp", config_map=client.V1ConfigMapVolumeSource(name="cega-mq-config"))
    volume_cega_rabbitmq = client.V1Volume(name="cega-rabbitmq",
                                           persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="cega-mq-storage"))
    mount_mq_cega = client.V1VolumeMount(name="cega-mq-entrypoint", mount_path='/script')
    volume_mq_cega = client.V1Volume(name="cega-mq-entrypoint", config_map=client.V1ConfigMapVolumeSource(name="cega-mq-entrypoint",
                                                                                                          default_mode=0o744))

    deploy_lega.stateful_set('cega-mq', 'rabbitmq:3.6.14-management', ["/script/cega-mq.sh"], None,
                             [mount_cega_temp, mount_mq_cega, mount_cega_rabbitmq],
                             [volume_cega_temp, volume_mq_cega, volume_cega_rabbitmq],
                             ports=[15672, 5672, 4369, 25672])

    deploy_lega.deployment('cega-users', 'nbisweden/ega-base:latest', ["python3.6", "/cega/server.py"],
                           [env_users_inst, env_users_creds],
                           [mount_users], [volume_users],
                           ports=[8001])
    ports_users = [client.V1ServicePort(protocol="TCP", port=8001, target_port=8001)]
    deploy_lega.service('cega-mq', ports_mq, type="NodePort")
    deploy_lega.service('cega-mq-management', ports_mq_management, pod_name="cega-mq", type="NodePort")
    deploy_lega.service('cega-users', ports_users, type="NodePort")
示例#10
0
def kubernetes_deployment(_localega, config, ns, fake_cega):
    """Wrap all the kubernetes settings."""
    _here = Path(__file__).parent
    trace_file = Path(_here / 'config/trace.ini')
    assert trace_file.exists(),  "No trace file!"
    trace_config = configparser.ConfigParser()
    trace_config.read(trace_file)

    deploy_lega = LocalEGADeploy(_localega, ns)

    # Setting ENV variables and Volumes
    env_cega_api = client.V1EnvVar(name="CEGA_ENDPOINT", value=f"{_localega['cega']['endpoint']}")
    env_inbox_mq = client.V1EnvVar(name="BROKER_HOST", value=f"{_localega['services']['broker']}.{ns}")
    env_inbox_port = client.V1EnvVar(name="INBOX_PORT", value="2222")
    env_db_data = client.V1EnvVar(name="PGDATA", value="/var/lib/postgresql/data/pgdata")
    env_cega_mq = client.V1EnvVar(name="CEGA_CONNECTION",
                                  value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='cega-connection',
                                                                                                             key="address")))
    env_cega_creds = client.V1EnvVar(name="CEGA_ENDPOINT_CREDS",
                                     value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='cega-creds',
                                                                                                                key="credentials")))
    env_acc_minio = client.V1EnvVar(name="MINIO_ACCESS_KEY",
                                    value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys',
                                                                                                               key="access")))
    env_sec_minio = client.V1EnvVar(name="MINIO_SECRET_KEY",
                                    value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys',
                                                                                                               key="secret")))
    env_acc_s3 = client.V1EnvVar(name="S3_ACCESS_KEY",
                                 value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys',
                                                                                                            key="access")))
    env_sec_s3 = client.V1EnvVar(name="S3_SECRET_KEY",
                                 value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys',
                                                                                                            key="secret")))
    env_db_pass = client.V1EnvVar(name="POSTGRES_PASSWORD",
                                  value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='lega-db-secret',
                                                                                                             key="postgres_password")))
    env_db_user = client.V1EnvVar(name="POSTGRES_USER",
                                  value_from=client.V1EnvVarSource(config_map_key_ref=client.V1ConfigMapKeySelector(name='lega-db-config',
                                                                                                                    key="user")))
    env_db_name = client.V1EnvVar(name="POSTGRES_DB",
                                  value_from=client.V1EnvVarSource(config_map_key_ref=client.V1ConfigMapKeySelector(name='lega-db-config',
                                                                                                                    key="dbname")))
    env_lega_pass = client.V1EnvVar(name="LEGA_PASSWORD",
                                    value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='lega-password',
                                                                                                               key="password")))
    env_keys_pass = client.V1EnvVar(name="KEYS_PASSWORD",
                                    value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='keys-password',
                                                                                                               key="password")))
    mount_config = client.V1VolumeMount(name="config", mount_path='/etc/ega')
    mount_inbox = client.V1VolumeMount(name="inbox", mount_path='/ega/inbox')
    mount_mq_temp = client.V1VolumeMount(name="mq-temp", mount_path='/temp')
    mount_mq_rabbitmq = client.V1VolumeMount(name="rabbitmq", mount_path='/etc/rabbitmq')
    mount_mq_script = client.V1VolumeMount(name="mq-entrypoint", mount_path='/script')
    mount_db_data = client.V1VolumeMount(name="data", mount_path='/var/lib/postgresql/data', read_only=False)
    mound_db_init = client.V1VolumeMount(name="initsql", mount_path='/docker-entrypoint-initdb.d')
    mount_minio = client.V1VolumeMount(name="data", mount_path='/data')

    pmap_ini_conf = client.V1VolumeProjection(config_map=client.V1ConfigMapProjection(name="lega-config",
                                                                                      items=[client.V1KeyToPath(key="conf.ini", path="conf.ini", mode=0o744)]))
    pmap_ini_keys = client.V1VolumeProjection(config_map=client.V1ConfigMapProjection(name="lega-keyserver-config",
                                                                                      items=[client.V1KeyToPath(key="keys.ini",
                                                                                                                path="keys.ini", mode=0o744)]))
    sec_keys = client.V1VolumeProjection(secret=client.V1SecretProjection(name="keyserver-secret",
                                                                          items=[client.V1KeyToPath(key="key1.sec", path="pgp/key.1"), client.V1KeyToPath(key="ssl.cert", path="ssl.cert"), client.V1KeyToPath(key="ssl.key", path="ssl.key")]))
    deploy_lega.create_namespace()
    deploy_lega.config_secret('cega-creds', {'credentials': trace_config['secrets']['cega_creds']})
    # Create Secrets
    deploy_lega.config_secret('cega-connection', {'address': trace_config['secrets']['cega_address']})
    deploy_lega.config_secret('lega-db-secret', {'postgres_password': trace_config['secrets']['postgres_password']})
    deploy_lega.config_secret('s3-keys', {'access': trace_config['secrets']['s3_access'],
                                          'secret': trace_config['secrets']['s3_secret']})
    deploy_lega.config_secret('lega-password', {'password': trace_config['secrets']['lega_password']})
    deploy_lega.config_secret('keys-password', {'password': trace_config['secrets']['keys_password']})

    with open(_here / 'config/key.1.sec') as key_file:
        key1_data = key_file.read()

    with open(_here / 'config/ssl.cert') as cert:
        ssl_cert = cert.read()

    with open(_here / 'config/ssl.key') as key:
        ssl_key = key.read()

    deploy_lega.config_secret('keyserver-secret', {'key1.sec': key1_data,
                                                   'ssl.cert': ssl_cert, 'ssl.key': ssl_key})

    # Read conf from files
    with open(_here / 'extras/db.sql') as sql_init:
        init_sql = sql_init.read()

    with open(_here / 'extras/mq.sh') as mq_init:
        init_mq = mq_init.read()

    with open(_here / 'config/conf.ini') as conf_file:
        data_conf = conf_file.read()

    with open(_here / 'config/keys.ini') as keys_file:
        data_keys = keys_file.read()

    with open(_here / 'config/rabbitmq.config') as config:
        config_mq = config.read()

    with open(_here / 'config/defs.json') as defs:
        defs_mq = defs.read()

    # secret = deploy_lega.read_secret('keys-password')
    # enc_keys = conf.aes_encrypt(b64decode(secret.to_dict()['data']['password'].encode('utf-8')), data_keys.encode('utf-8'), md5)

    # with open(_here / 'config/keys.ini.enc', 'w') as enc_file:
    #     enc_file.write(b64encode(enc_keys).decode('utf-8'))

    # Upload Configuration Maps
    deploy_lega.config_map('initsql', {'db.sql': init_sql})
    deploy_lega.config_map('mq-config', {'defs.json': defs_mq, 'rabbitmq.config': config_mq})
    deploy_lega.config_map('mq-entrypoint', {'mq.sh': init_mq})
    deploy_lega.config_map('lega-config', {'conf.ini': data_conf})
    deploy_lega.config_map('lega-keyserver-config', {'keys.ini': data_keys})
    deploy_lega.config_map('lega-db-config', {'user': '******', 'dbname': 'lega'})

    # Volumes
    deploy_lega.persistent_volume("postgres", "0.5Gi", accessModes=["ReadWriteMany"])
    deploy_lega.persistent_volume("rabbitmq", "0.5Gi")
    deploy_lega.persistent_volume("inbox", "0.5Gi", accessModes=["ReadWriteMany"])
    deploy_lega.persistent_volume_claim("db-storage", "postgres", "0.5Gi", accessModes=["ReadWriteMany"])
    deploy_lega.persistent_volume_claim("mq-storage", "rabbitmq", "0.5Gi")
    deploy_lega.persistent_volume_claim("inbox", "inbox", "0.5Gi", accessModes=["ReadWriteMany"])
    volume_db = client.V1Volume(name="data", persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="db-storage"))
    volume_rabbitmq = client.V1Volume(name="rabbitmq",
                                      persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="mq-storage"))
    volume_db_init = client.V1Volume(name="initsql", config_map=client.V1ConfigMapVolumeSource(name="initsql"))
    volume_mq_temp = client.V1Volume(name="mq-temp", config_map=client.V1ConfigMapVolumeSource(name="mq-config"))
    volume_mq_script = client.V1Volume(name="mq-entrypoint", config_map=client.V1ConfigMapVolumeSource(name="mq-entrypoint",
                                                                                                       default_mode=0o744))
    volume_config = client.V1Volume(name="config", config_map=client.V1ConfigMapVolumeSource(name="lega-config"))
    # volume_ingest = client.V1Volume(name="ingest-conf", config_map=client.V1ConfigMapVolumeSource(name="lega-config"))
    volume_inbox = client.V1Volume(name="inbox", persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="inbox"))
    volume_keys = client.V1Volume(name="config",
                                  projected=client.V1ProjectedVolumeSource(sources=[pmap_ini_conf, pmap_ini_keys, sec_keys]))

    pvc_minio = client.V1PersistentVolumeClaim(metadata=client.V1ObjectMeta(name="data"),
                                               spec=client.V1PersistentVolumeClaimSpec(access_modes=["ReadWriteOnce"],
                                                                                       resources=client.V1ResourceRequirements(requests={"storage": "10Gi"})))
    # Deploy LocalEGA Pods
    deploy_lega.deployment('mapper', 'nbisweden/ega-base:latest',
                           ["ega-id-mapper"], [], [mount_config], [volume_config], patch=True)
    deploy_lega.deployment('keys', 'nbisweden/ega-base:latest',
                           ["ega-keyserver", "--keys", "/etc/ega/keys.ini"],
                           [env_lega_pass, env_keys_pass], [mount_config], [volume_keys], ports=[8443], patch=True)
    deploy_lega.deployment('db', 'postgres:9.6', None, [env_db_pass, env_db_user, env_db_name, env_db_data],
                           [mount_db_data, mound_db_init], [volume_db, volume_db_init], ports=[5432])
    deploy_lega.deployment('ingest', 'nbisweden/ega-base:latest', ["ega-ingest"],
                           [env_lega_pass, env_acc_s3, env_sec_s3, env_db_pass],
                           [mount_config, mount_inbox], [volume_config, volume_inbox])

    deploy_lega.stateful_set('minio', 'minio/minio:latest', None, [env_acc_minio, env_sec_minio],
                             [mount_minio], None, args=["server", "/data"], vol_claims=[pvc_minio], ports=[9000])

    deploy_lega.stateful_set('verify', 'nbisweden/ega-base:latest', ["ega-verify"],
                             [env_acc_s3, env_sec_s3, env_lega_pass, env_db_pass], [mount_config], [volume_config])

    deploy_lega.stateful_set('mq', 'rabbitmq:3.6.14-management', ["/script/mq.sh"],
                             [env_cega_mq], [mount_mq_temp, mount_mq_script, mount_mq_rabbitmq],
                             [volume_mq_temp, volume_mq_script, volume_rabbitmq],
                             ports=[15672, 5672, 4369, 25672])
    deploy_lega.stateful_set('inbox', 'nbisweden/ega-mina-inbox:latest', None,
                             [env_inbox_mq, env_cega_api, env_cega_creds, env_inbox_port],
                             [mount_inbox], [volume_inbox], ports=[2222])

    # Ports
    ports_db = [client.V1ServicePort(protocol="TCP", port=5432, target_port=5432)]
    ports_inbox = [client.V1ServicePort(protocol="TCP", port=2222, target_port=2222)]
    ports_s3 = [client.V1ServicePort(name="web", protocol="TCP", port=9000)]
    ports_keys = [client.V1ServicePort(protocol="TCP", port=8443, target_port=8443)]
    ports_mq_management = [client.V1ServicePort(name="http", protocol="TCP", port=15672, target_port=15672)]
    ports_mq = [client.V1ServicePort(name="amqp", protocol="TCP", port=5672, target_port=5672),
                client.V1ServicePort(name="epmd", protocol="TCP", port=4369, target_port=4369),
                client.V1ServicePort(name="rabbitmq-dist", protocol="TCP", port=25672, target_port=25672)]

    # Deploy Services
    deploy_lega.service('db', ports_db)
    deploy_lega.service('mq-management', ports_mq_management, pod_name="mq", type="NodePort")
    deploy_lega.service('mq', ports_mq)
    deploy_lega.service('keys', ports_keys)
    deploy_lega.service('inbox', ports_inbox, type="NodePort")
    deploy_lega.service('minio', ports_s3)  # Headless
    deploy_lega.service('minio-service', ports_s3, pod_name="minio", type="LoadBalancer")

    metric_cpu = client.V2beta1MetricSpec(type="Resource",
                                          resource=client.V2beta1ResourceMetricSource(name="cpu", target_average_utilization=50))
    deploy_lega.horizontal_scale("ingest", "ingest", "Deployment", 5, [metric_cpu])

    if fake_cega:
        deploy_fake_cega(deploy_lega)
示例#11
0
def get_statefulset_object(cluster_object):
    name = cluster_object['metadata']['name']
    namespace = cluster_object['metadata']['namespace']

    try:
        replicas = cluster_object['spec']['mongodb']['replicas']
    except KeyError:
        replicas = 3

    try:
        mongodb_limit_cpu = \
            cluster_object['spec']['mongodb']['mongodb_limit_cpu']
    except KeyError:
        mongodb_limit_cpu = '100m'

    try:
        mongodb_limit_memory = \
            cluster_object['spec']['mongodb']['mongodb_limit_memory']
    except KeyError:
        mongodb_limit_memory = '64Mi'

    statefulset = client.V1beta1StatefulSet()

    # Metadata
    statefulset.metadata = client.V1ObjectMeta(
        name=name, namespace=namespace, labels=get_default_labels(name=name))

    # Spec
    statefulset.spec = client.V1beta1StatefulSetSpec(replicas=replicas,
                                                     service_name=name)

    statefulset.spec.template = client.V1PodTemplateSpec()
    statefulset.spec.template.metadata = client.V1ObjectMeta(
        labels=get_default_labels(name=name))

    statefulset.spec.template.spec = client.V1PodSpec()
    statefulset.spec.template.spec.affinity = client.V1Affinity(
        pod_anti_affinity=client.V1PodAntiAffinity(
            required_during_scheduling_ignored_during_execution=[
                client.V1PodAffinityTerm(
                    topology_key='kubernetes.io/hostname',
                    label_selector=client.V1LabelSelector(match_expressions=[
                        client.V1LabelSelectorRequirement(
                            key='cluster', operator='In', values=[name])
                    ]))
            ]))
    # MongoDB container
    mongodb_port = client.V1ContainerPort(name='mongodb',
                                          container_port=27017,
                                          protocol='TCP')
    mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod')
    mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data',
                                                    read_only=False,
                                                    mount_path='/data/db')
    mongodb_resources = client.V1ResourceRequirements(limits={
        'cpu':
        mongodb_limit_cpu,
        'memory':
        mongodb_limit_memory
    },
                                                      requests={
                                                          'cpu':
                                                          mongodb_limit_cpu,
                                                          'memory':
                                                          mongodb_limit_memory
                                                      })
    mongodb_container = client.V1Container(
        name='mongod',
        command=[
            'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL',
            '--clusterAuthMode', 'x509', '--sslPEMKeyFile',
            '/etc/ssl/mongod/mongod.pem', '--sslCAFile',
            '/etc/ssl/mongod/ca.pem'
        ],
        image='mongo:3.4.1',
        ports=[mongodb_port],
        volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount],
        resources=mongodb_resources)

    # Metrics container
    metrics_port = client.V1ContainerPort(name='metrics',
                                          container_port=9001,
                                          protocol='TCP')
    metrics_resources = client.V1ResourceRequirements(limits={
        'cpu': '50m',
        'memory': '16Mi'
    },
                                                      requests={
                                                          'cpu': '50m',
                                                          'memory': '16Mi'
                                                      })
    metrics_secret_name = '{}-monitoring-credentials'.format(name)
    metrics_username_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_USERNAME',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='username')))
    metrics_password_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_PASSWORD',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='password')))
    metrics_container = client.V1Container(
        name='prometheus-exporter',
        image='quay.io/kubestack/prometheus-mongodb-exporter:latest',
        command=[
            '/bin/sh', '-c',
            '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem'
        ],  # flake8: noqa
        ports=[metrics_port],
        resources=metrics_resources,
        volume_mounts=[mongodb_tls_volumemount],
        env=[metrics_username_env_var, metrics_password_env_var])

    statefulset.spec.template.spec.containers = [
        mongodb_container, metrics_container
    ]

    ca_volume = client.V1Volume(name='mongo-ca',
                                secret=client.V1SecretVolumeSource(
                                    secret_name='{}-ca'.format(name),
                                    items=[
                                        client.V1KeyToPath(key='ca.pem',
                                                           path='ca.pem'),
                                        client.V1KeyToPath(key='ca-key.pem',
                                                           path='ca-key.pem')
                                    ]))
    tls_volume = client.V1Volume(name='mongo-tls',
                                 empty_dir=client.V1EmptyDirVolumeSource())
    data_volume = client.V1Volume(name='mongo-data',
                                  empty_dir=client.V1EmptyDirVolumeSource())
    statefulset.spec.template.spec.volumes = [
        ca_volume, tls_volume, data_volume
    ]

    # Init container
    # For now use annotation format for init_container to support K8s >= 1.5
    statefulset.spec.template.metadata.annotations = {
        'pod.beta.kubernetes.io/init-containers':
        '[{"name": "cert-init","image": "quay.io/kubestack/mongodb-init:latest","volumeMounts": [{"readOnly": true,"mountPath": "/etc/ssl/mongod-ca","name": "mongo-ca"}, {"mountPath": "/etc/ssl/mongod","name": "mongo-tls"}],"env": [{"name": "METADATA_NAME","valueFrom": {"fieldRef": {"apiVersion": "v1","fieldPath": "metadata.name"}}}, {"name": "NAMESPACE","valueFrom": {"fieldRef": {"apiVersion": "v1","fieldPath": "metadata.namespace"}}}],"command": ["ansible-playbook","member-cert.yml"],"imagePullPolicy": "Always"}]'
    }  # flake8: noqa

    # tls_init_ca_volumemount = client.V1VolumeMount(
    #     name='mongo-ca',
    #     read_only=True,
    #     mount_path='/etc/ssl/mongod-ca')
    # tls_init_container = client.V1Container(
    #     name="cert-init",
    #     image="quay.io/kubestack/mongodb-init:latest",
    #     volume_mounts=[tls_init_ca_volumemount, mongodb_tls_volumemount],
    #     env=[
    #         client.V1EnvVar(
    #             name='METADATA_NAME',
    #             value_from=client.V1EnvVarSource(
    #                 field_ref=client.V1ObjectFieldSelector(
    #                     api_version='v1',
    #                     field_path='metadata.name'))),
    #         client.V1EnvVar(
    #             name='NAMESPACE',
    #             value_from=client.V1EnvVarSource(
    #                 field_ref=client.V1ObjectFieldSelector(
    #                     api_version='v1',
    #                     field_path='metadata.namespace')))],
    #     command=["ansible-playbook", "member-cert.yml"])
    #
    # statefulset.spec.template.spec.init_containers = [tls_init_container]

    return statefulset
def apply_rekcurd_to_kubernetes(project_id: int,
                                application_id: str,
                                service_level: str,
                                version: str,
                                insecure_host: str,
                                insecure_port: int,
                                replicas_default: int,
                                replicas_minimum: int,
                                replicas_maximum: int,
                                autoscale_cpu_threshold: str,
                                policy_max_surge: int,
                                policy_max_unavailable: int,
                                policy_wait_seconds: int,
                                container_image: str,
                                resource_request_cpu: str,
                                resource_request_memory: str,
                                resource_limit_cpu: str,
                                resource_limit_memory: str,
                                commit_message: str,
                                service_model_assignment: int,
                                service_git_url: str = "",
                                service_git_branch: str = "",
                                service_boot_script: str = "",
                                debug_mode: bool = False,
                                service_id: str = None,
                                is_creation_mode: bool = False,
                                display_name: str = None,
                                description: str = None,
                                kubernetes_models=None,
                                **kwargs) -> str:
    """
    kubectl apply
    :param project_id:
    :param application_id:
    :param service_level:
    :param version:
    :param insecure_host:
    :param insecure_port:
    :param replicas_default:
    :param replicas_minimum:
    :param replicas_maximum:
    :param autoscale_cpu_threshold:
    :param policy_max_surge:
    :param policy_max_unavailable:
    :param policy_wait_seconds:
    :param container_image:
    :param resource_request_cpu:
    :param resource_request_memory:
    :param resource_limit_cpu:
    :param resource_limit_memory:
    :param commit_message:
    :param service_model_assignment:
    :param service_git_url:
    :param service_git_branch:
    :param service_boot_script:
    :param debug_mode:
    :param service_id:
    :param is_creation_mode:
    :param display_name:
    :param description:
    :param kubernetes_models:
    :param kwargs:
    :return:
    """
    __num_retry = 5
    progress_deadline_seconds = \
        int(__num_retry*policy_wait_seconds*replicas_maximum/(policy_max_surge+policy_max_unavailable))
    if service_id is None:
        is_creation_mode = True
        service_id = uuid.uuid4().hex
    if kubernetes_models is None:
        kubernetes_models = db.session.query(KubernetesModel).filter(
            KubernetesModel.project_id == project_id).all()
    data_server_model: DataServerModel = db.session.query(
        DataServerModel).filter(
            DataServerModel.project_id == project_id).first_or_404()
    application_model: ApplicationModel = db.session.query(
        ApplicationModel).filter(
            ApplicationModel.application_id == application_id).first_or_404()
    application_name = application_model.application_name
    model_model: ModelModel = db.session.query(ModelModel).filter(
        ModelModel.model_id == service_model_assignment).first_or_404()

    from kubernetes import client
    try:
        git_secret = load_secret(project_id, application_id, service_level,
                                 GIT_SECRET_PREFIX)
    except:
        git_secret = None
    volume_mounts = dict()
    volumes = dict()
    if git_secret:
        connector_name = "sec-git-name"
        secret_name = "sec-{}-{}".format(GIT_SECRET_PREFIX, application_id)
        volume_mounts = {
            'volume_mounts': [
                client.V1VolumeMount(name=connector_name,
                                     mount_path=GIT_SSH_MOUNT_DIR,
                                     read_only=True)
            ]
        }
        volumes = {
            'volumes': [
                client.V1Volume(name=connector_name,
                                secret=client.V1SecretVolumeSource(
                                    secret_name=secret_name,
                                    items=[
                                        client.V1KeyToPath(key=GIT_ID_RSA,
                                                           path=GIT_ID_RSA,
                                                           mode=GIT_SSH_MODE),
                                        client.V1KeyToPath(key=GIT_CONFIG,
                                                           path=GIT_CONFIG,
                                                           mode=GIT_SSH_MODE)
                                    ]))
            ]
        }

    for kubernetes_model in kubernetes_models:
        full_config_path = get_full_config_path(kubernetes_model.config_path)
        from kubernetes import config
        config.load_kube_config(full_config_path)

        pod_env = [
            client.V1EnvVar(name="REKCURD_SERVICE_UPDATE_FLAG",
                            value=commit_message),
            client.V1EnvVar(name="REKCURD_KUBERNETES_MODE", value="True"),
            client.V1EnvVar(name="REKCURD_DEBUG_MODE", value=str(debug_mode)),
            client.V1EnvVar(name="REKCURD_APPLICATION_NAME",
                            value=application_name),
            client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_HOST",
                            value=insecure_host),
            client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_PORT",
                            value=str(insecure_port)),
            client.V1EnvVar(name="REKCURD_SERVICE_ID", value=service_id),
            client.V1EnvVar(name="REKCURD_SERVICE_LEVEL", value=service_level),
            client.V1EnvVar(name="REKCURD_GRPC_PROTO_VERSION", value=version),
            client.V1EnvVar(name="REKCURD_MODEL_MODE",
                            value=data_server_model.data_server_mode.value),
            client.V1EnvVar(name="REKCURD_MODEL_FILE_PATH",
                            value=model_model.filepath),
            client.V1EnvVar(name="REKCURD_CEPH_ACCESS_KEY",
                            value=str(data_server_model.ceph_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_SECRET_KEY",
                            value=str(data_server_model.ceph_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_HOST",
                            value=str(data_server_model.ceph_host or "xxx")),
            client.V1EnvVar(name="REKCURD_CEPH_PORT",
                            value=str(data_server_model.ceph_port or "1234")),
            client.V1EnvVar(name="REKCURD_CEPH_IS_SECURE",
                            value=str(data_server_model.ceph_is_secure
                                      or "False")),
            client.V1EnvVar(name="REKCURD_CEPH_BUCKET_NAME",
                            value=str(data_server_model.ceph_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_ACCESS_KEY",
                            value=str(data_server_model.aws_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_SECRET_KEY",
                            value=str(data_server_model.aws_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_AWS_BUCKET_NAME",
                            value=str(data_server_model.aws_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_ACCESS_KEY",
                            value=str(data_server_model.gcs_access_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_SECRET_KEY",
                            value=str(data_server_model.gcs_secret_key
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_GCS_BUCKET_NAME",
                            value=str(data_server_model.gcs_bucket_name
                                      or "xxx")),
            client.V1EnvVar(name="REKCURD_SERVICE_GIT_URL",
                            value=service_git_url),
            client.V1EnvVar(name="REKCURD_SERVICE_GIT_BRANCH",
                            value=service_git_branch),
            client.V1EnvVar(name="REKCURD_SERVICE_BOOT_SHELL",
                            value=service_boot_script),
        ]
        """Namespace registration."""
        core_vi_api = client.CoreV1Api()
        try:
            core_vi_api.read_namespace(name=service_level)
        except:
            api.logger.info("\"{}\" namespace created".format(service_level))
            v1_namespace = client.V1Namespace(
                api_version="v1",
                kind="Namespace",
                metadata=client.V1ObjectMeta(name=service_level))
            core_vi_api.create_namespace(body=v1_namespace)
        """Create/patch Deployment."""
        v1_deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name="deploy-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1DeploymentSpec(
                min_ready_seconds=policy_wait_seconds,
                progress_deadline_seconds=progress_deadline_seconds,
                replicas=replicas_default,
                revision_history_limit=3,
                selector=client.V1LabelSelector(
                    match_labels={"sel": service_id}),
                strategy=client.V1DeploymentStrategy(
                    type="RollingUpdate",
                    rolling_update=client.V1RollingUpdateDeployment(
                        max_surge=policy_max_surge,
                        max_unavailable=policy_max_unavailable)),
                template=client.V1PodTemplateSpec(
                    metadata=client.V1ObjectMeta(
                        labels={
                            "rekcurd-worker": "True",
                            "id": application_id,
                            "name": application_name,
                            "sel": service_id
                        }),
                    spec=client.V1PodSpec(affinity=client.V1Affinity(
                        pod_anti_affinity=client.V1PodAntiAffinity(
                            preferred_during_scheduling_ignored_during_execution
                            =[
                                client.V1WeightedPodAffinityTerm(
                                    pod_affinity_term=client.V1PodAffinityTerm(
                                        label_selector=client.
                                        V1LabelSelector(match_expressions=[
                                            client.V1LabelSelectorRequirement(
                                                key="id",
                                                operator="In",
                                                values=[service_id])
                                        ]),
                                        topology_key="kubernetes.io/hostname"),
                                    weight=100)
                            ])),
                                          containers=[
                                              client.V1Container(
                                                  env=pod_env,
                                                  image=container_image,
                                                  image_pull_policy="Always",
                                                  name=service_id,
                                                  ports=[
                                                      client.V1ContainerPort(
                                                          container_port=
                                                          insecure_port)
                                                  ],
                                                  resources=client.
                                                  V1ResourceRequirements(
                                                      limits={
                                                          "cpu":
                                                          str(resource_limit_cpu
                                                              ),
                                                          "memory":
                                                          resource_limit_memory
                                                      },
                                                      requests={
                                                          "cpu":
                                                          str(resource_request_cpu
                                                              ),
                                                          "memory":
                                                          resource_request_memory
                                                      }),
                                                  security_context=client.
                                                  V1SecurityContext(
                                                      privileged=True),
                                                  **volume_mounts)
                                          ],
                                          node_selector={
                                              "host": service_level
                                          },
                                          **volumes))))
        apps_v1_api = client.AppsV1Api()
        if is_creation_mode:
            api.logger.info("Deployment created.")
            apps_v1_api.create_namespaced_deployment(body=v1_deployment,
                                                     namespace=service_level)
        else:
            api.logger.info("Deployment patched.")
            apps_v1_api.patch_namespaced_deployment(
                body=v1_deployment,
                name="deploy-{0}".format(service_id),
                namespace=service_level)
        """Create/patch Service."""
        v1_service = client.V1Service(
            api_version="v1",
            kind="Service",
            metadata=client.V1ObjectMeta(name="svc-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1ServiceSpec(ports=[
                client.V1ServicePort(name="grpc-backend",
                                     port=insecure_port,
                                     protocol="TCP",
                                     target_port=insecure_port)
            ],
                                      selector={"sel": service_id}))
        core_vi_api = client.CoreV1Api()
        if is_creation_mode:
            api.logger.info("Service created.")
            core_vi_api.create_namespaced_service(namespace=service_level,
                                                  body=v1_service)
        else:
            api.logger.info("Service patched.")
            core_vi_api.patch_namespaced_service(
                namespace=service_level,
                name="svc-{0}".format(service_id),
                body=v1_service)
        """Create/patch Autoscaler."""
        v1_horizontal_pod_autoscaler = client.V1HorizontalPodAutoscaler(
            api_version="autoscaling/v1",
            kind="HorizontalPodAutoscaler",
            metadata=client.V1ObjectMeta(name="hpa-{0}".format(service_id),
                                         namespace=service_level,
                                         labels={
                                             "rekcurd-worker": "True",
                                             "id": application_id,
                                             "name": application_name,
                                             "sel": service_id
                                         }),
            spec=client.V1HorizontalPodAutoscalerSpec(
                max_replicas=replicas_maximum,
                min_replicas=replicas_minimum,
                scale_target_ref=client.V1CrossVersionObjectReference(
                    api_version="apps/v1",
                    kind="Deployment",
                    name="deploy-{0}".format(service_id)),
                target_cpu_utilization_percentage=autoscale_cpu_threshold))
        autoscaling_v1_api = client.AutoscalingV1Api()
        if is_creation_mode:
            api.logger.info("Autoscaler created.")
            autoscaling_v1_api.create_namespaced_horizontal_pod_autoscaler(
                namespace=service_level, body=v1_horizontal_pod_autoscaler)
        else:
            api.logger.info("Autoscaler patched.")
            autoscaling_v1_api.patch_namespaced_horizontal_pod_autoscaler(
                namespace=service_level,
                name="hpa-{0}".format(service_id),
                body=v1_horizontal_pod_autoscaler)
        """Create Istio ingress if this is the first application."""
        custom_object_api = client.CustomObjectsApi()
        try:
            custom_object_api.get_namespaced_custom_object(
                group="networking.istio.io",
                version="v1alpha3",
                namespace=service_level,
                plural="virtualservices",
                name="ing-vs-{0}".format(application_id),
            )
        except:
            ingress_virtual_service_body = {
                "apiVersion": "networking.istio.io/v1alpha3",
                "kind": "VirtualService",
                "metadata": {
                    "labels": {
                        "rekcurd-worker": "True",
                        "id": application_id,
                        "name": application_name
                    },
                    "name": "ing-vs-{0}".format(application_id),
                    "namespace": service_level
                },
                "spec": {
                    "hosts": ["*"],
                    "gateways": ["rekcurd-ingress-gateway"],
                    "http": [{
                        "match": [{
                            "headers": {
                                "x-rekcurd-application-name": {
                                    "exact": application_name
                                },
                                "x-rekcurd-sevice-level": {
                                    "exact": service_level
                                },
                                "x-rekcurd-grpc-version": {
                                    "exact": version
                                },
                            }
                        }],
                        "route": [{
                            "destination": {
                                "port": {
                                    "number": insecure_port
                                },
                                "host": "svc-{0}".format(service_id)
                            },
                            "weight": 100
                        }],
                        "retries": {
                            "attempts": 25,
                            "perTryTimeout": "1s"
                        }
                    }]
                }
            }
            api.logger.info("Istio created.")
            custom_object_api.create_namespaced_custom_object(
                group="networking.istio.io",
                version="v1alpha3",
                namespace=service_level,
                plural="virtualservices",
                body=ingress_virtual_service_body)
        """Add service model."""
        if is_creation_mode:
            if display_name is None:
                display_name = "{0}-{1}".format(service_level, service_id)
            service_model = ServiceModel(service_id=service_id,
                                         application_id=application_id,
                                         display_name=display_name,
                                         description=description,
                                         service_level=service_level,
                                         version=version,
                                         model_id=service_model_assignment,
                                         insecure_host=insecure_host,
                                         insecure_port=insecure_port)
            db.session.add(service_model)
            db.session.flush()
    """Finish."""
    return service_id
def createJob(username,
              image,
              replicas,
              train_data=None,
              test_data=None,
              train_mode="cpu",
              is_host_network=False,
              ssh_port="22"):
    job_name = username + "-horovod"

    # TODO: port 번호 수정
    new_ssh_port = int(ssh_port)
    new_ssh_port += 1
    new_ssh_port = "{}".format(new_ssh_port)
    job = client.V1Job()
    job.metadata = client.V1ObjectMeta(name=job_name,
                                       labels={
                                           "app": "horovod",
                                           "user": username,
                                           "role": "master"
                                       })
    # Job Spec 정의
    job_spec = client.V1JobSpec(template="")
    # job_spec.metadata = client.V1ObjectMeta(name=job_name, labels={
    #     "app": "horovod",
    #     "user": username,
    #     "role": "master"
    # })
    # Job Spec의 Pod Template 정의
    pod_template_spec = client.V1PodTemplateSpec()
    pod_template_spec.restart_policy = "OnFailure"
    pod_template_spec.metadata = client.V1ObjectMeta(name=job_name,
                                                     labels={
                                                         "app": "horovod",
                                                         "user": username,
                                                         "role": "master"
                                                     })
    pod_template_spec.spec = ""

    # Pod Spec 정의
    pod_spec = client.V1PodSpec(containers=[""], restart_policy="OnFailure")
    # Container 정의
    container = client.V1Container(name=job_name + "-master")

    container.image = image
    container.image_pull_policy = "IfNotPresent"
    container.env = [
        client.V1EnvVar(name="SSHPORT", value=ssh_port),
        client.V1EnvVar(name="USESECRETS", value="true"),
        # TODO: 바꾸기
        client.V1EnvVar(name="ENTRY_POINT", value="train.py"),
        client.V1EnvVar(name="JOB_NAME", value=username)
    ]
    container.ports = [client.V1ContainerPort(container_port=int(ssh_port))]
    container.volume_mounts = [
        client.V1VolumeMount(name=job_name + "-cm",
                             mount_path="/horovod/generated"),
        client.V1VolumeMount(name=job_name + "-secret",
                             mount_path="/etc/secret-volume",
                             read_only=True),
        client.V1VolumeMount(name=job_name + "-data",
                             mount_path="/horovod/data")
    ]
    container.command = ["/horovod/generated/run.sh"]

    cpu_mode_stub = ""
    if train_mode == "cpu":
        cpu_mode_stub = "ldconfig /usr/local/cuda/lib64/stubs;"

    # TODO: cpu, gpu, 학습 코드
    container.args = [
        "ldconfig /usr/local/cuda/lib64/stubs && mpirun -np {replicas} --hostfile /horovod/generated/hostfile\
            --mca orte_keep_fqdn_hostnames t --allow-run-as-root --display-map --tag-output\
            --timestamp-output sh -c '{cpu_mode_stub} python /horovod/data/train.py'"
        .format(replicas=replicas, cpu_mode_stub=cpu_mode_stub)
    ]

    pod_spec.volumes = [
        client.V1Volume(name=job_name + "-cm",
                        config_map=client.V1ConfigMapVolumeSource(
                            name=job_name,
                            items=[
                                client.V1KeyToPath(key="hostfile.config",
                                                   path="hostfile",
                                                   mode=438),
                                client.V1KeyToPath(
                                    key="master.waitWorkerReady",
                                    path="waitWorkerReady.sh",
                                    mode=365),
                                client.V1KeyToPath(key="master.run",
                                                   path="run.sh",
                                                   mode=365)
                            ])),
        client.V1Volume(name=job_name + "-secret",
                        secret=client.V1SecretVolumeSource(
                            secret_name=job_name,
                            default_mode=448,
                            items=[
                                client.V1KeyToPath(key="host-key",
                                                   path="id_rsa"),
                                client.V1KeyToPath(key="host-key-pub",
                                                   path="authorized_keys")
                            ])),
        client.V1Volume(name=job_name + "-data",
                        empty_dir=client.V1EmptyDirVolumeSource())
    ]

    # TODO: 지금은 Node Selector인데 삭제하고 어떻게 할지 생각하기
    # pod_spec.node_selector = {
    #     "node-role": "master"
    # }

    pod_spec.containers = [container]
    if is_host_network == True:
        pod_spec.host_network = True
        pod_spec.dns_policy = "ClusterFirstWithHostNet"

    # init container
    pod_spec.init_containers = [
        client.V1Container(
            name="wait-workers",
            image=image,
            image_pull_policy="IfNotPresent",
            env=[
                client.V1EnvVar(name="SSHPORT", value=ssh_port),
                client.V1EnvVar(name="USESECRETS", value="true")
            ],
            command=[
                "/horovod/generated/waitWorkerReady.sh",
                # TODO: S3 주소 다시 세팅하기.
            ],
            args=["/horovod/generated/hostfile"],
            volume_mounts=[
                client.V1VolumeMount(name=job_name + "-cm",
                                     mount_path="/horovod/generated"),
                client.V1VolumeMount(name=job_name + "-secret",
                                     mount_path="/etc/secret-volume",
                                     read_only=True),
                client.V1VolumeMount(name=job_name + "-data",
                                     mount_path="/horovod/data")
            ]),
        client.V1Container(
            name="download-data",
            image=image,
            image_pull_policy="IfNotPresent",
            command=["/bin/bash", "-c"],
            args=[
                "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/horovod/"
                + username + "/train.py > /horovod/data/train.py"
            ],
            volume_mounts=[
                client.V1VolumeMount(name=job_name + "-data",
                                     mount_path="/horovod/data")
            ])
    ]

    init_args = []
    # 학습 코드 다운로드
    init_args.append(
        "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/horovod/" +
        username + "/train.py > /horovod/data/train.py")

    if train_data != None:
        init_args.append(
            "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/{username}/{train_data} > /horovod/data/{train_data}"
            .format(username=username, train_data=train_data))

    if test_data != None:
        init_args.append(
            "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/{username}/{test_data} > /horovod/data/{test_data}"
            .format(username=username, test_data=test_data))

    pod_spec.init_containers[1].args = init_args
    print(pod_spec.init_containers[1].args)

    pod_template_spec.spec = pod_spec

    job_spec.template = pod_template_spec
    job.spec = job_spec
    return job
def createStatefulSet(username,
                      replicas,
                      image,
                      is_host_network=False,
                      ssh_port="22"):
    statefulset_name = username + "-horovod"

    statefulset = client.V1StatefulSet()
    #statefulset.api_version="apps/v1beta2"
    statefulset.metadata = client.V1ObjectMeta(name=statefulset_name,
                                               labels={
                                                   "app": "horovod",
                                                   "user": username,
                                                   "role": "worker"
                                               })

    label_selector = client.V1LabelSelector(match_labels={
        "app": "horovod",
        "user": username,
        "role": "worker"
    })

    # Pod template 정의

    pod_template = client.V1PodTemplateSpec()
    pod_template.metadata = client.V1ObjectMeta(labels={
        "app": "horovod",
        "user": username,
        "role": "worker"
    })

    container = client.V1Container(name="worker")
    container.image = image
    container.image_pull_policy = "IfNotPresent"
    container.env = [
        client.V1EnvVar(name="SSHPORT", value=ssh_port),
        client.V1EnvVar(name="USESECRETS", value="true"),
        # TODO: 바꾸기
        client.V1EnvVar(name="ENTRY_POINT", value="train.py")
    ]
    container.ports = [client.V1ContainerPort(container_port=22)]
    container.volume_mounts = [
        client.V1VolumeMount(name=statefulset_name + "-cm",
                             mount_path="/horovod/generated"),
        client.V1VolumeMount(name=statefulset_name + "-secret",
                             mount_path="/etc/secret-volume",
                             read_only=True),
        client.V1VolumeMount(name=statefulset_name + "-data",
                             mount_path="/horovod/data")
    ]
    container.command = ["/horovod/generated/run.sh"]
    container.readiness_probe = client.V1Probe(
        _exec=client.V1ExecAction(command=["/horovod/generated/check.sh"]),
        initial_delay_seconds=1,
        period_seconds=2)

    pod_spec = client.V1PodSpec(containers=[container])

    # Host Network가 설정되있다면
    if is_host_network == True:
        pod_spec.host_network = True
        pod_spec.dns_policy = "ClusterFirstWithHostNet"

    pod_spec.volumes = [
        client.V1Volume(name=statefulset_name + "-cm",
                        config_map=client.V1ConfigMapVolumeSource(
                            name=statefulset_name,
                            items=[
                                client.V1KeyToPath(key="hostfile.config",
                                                   path="hostfile",
                                                   mode=438),
                                client.V1KeyToPath(key="ssh.readiness",
                                                   path="check.sh",
                                                   mode=365),
                                client.V1KeyToPath(key="worker.run",
                                                   path="run.sh",
                                                   mode=365)
                            ])),
        client.V1Volume(name=statefulset_name + "-secret",
                        secret=client.V1SecretVolumeSource(
                            secret_name=statefulset_name,
                            default_mode=448,
                            items=[
                                client.V1KeyToPath(key="host-key",
                                                   path="id_rsa"),
                                client.V1KeyToPath(key="host-key-pub",
                                                   path="authorized_keys")
                            ])),
        client.V1Volume(name=statefulset_name + "-data",
                        empty_dir=client.V1EmptyDirVolumeSource())
    ]
    pod_spec.subdomain = statefulset_name
    pod_spec.hostname = statefulset_name
    pod_spec.init_containers = [
        client.V1Container(
            name="download-data",
            image=image,
            image_pull_policy="IfNotPresent",
            command=["/bin/bash", "-c"],
            args=[
                "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/horovod/"
                + username + "/train.py > /horovod/data/train.py"
            ],
            volume_mounts=[
                client.V1VolumeMount(name=statefulset_name + "-data",
                                     mount_path="/horovod/data")
            ])
    ]
    pod_template.spec = pod_spec

    statefulset.spec = client.V1StatefulSetSpec(
        selector=label_selector,
        service_name=statefulset_name +
        "-worker",  # https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-identity
        pod_management_policy="Parallel",
        replicas=replicas,
        template=pod_template)
    return statefulset
示例#15
0
    def __create_job_def(self, post_processing_required=False):
        # initialize the job def body
        self.inst_name = self.name
        if self.job_count > 1:
            self.inst_name = self.inst_name + '-' + str(self.job_count)
        self.job_count += 1
        job_def = client.V1Job(kind="Job")
        job_def.metadata = client.V1ObjectMeta(namespace=self.namespace,
                                               name=self.inst_name)

        # initialize job pieces
        self.job_containers = []
        volume_mounts = []
        volumes = []
        containers = []
        init_containers = []
        env_variables = []

        if not self.volume_name:
            # use the task name so it can be used across multiple jobs
            self.volume_name = self.name + '-pd'

        # build volume mounts
        volume_mounts = []
        volume_mounts.append(
            client.V1VolumeMount(mount_path=self.wrk_dir,
                                 name=self.volume_name))

        cpu_request_max = self.nodepool_info['max_cpu'] - self.cpu_reserve
        mem_request_max = self.nodepool_info['max_mem'] - self.mem_reserve

        # define resource limits/requests
        resource_def = client.V1ResourceRequirements(
            limits={
                'cpu': cpu_request_max,
                'memory': str(mem_request_max) + 'G'
            },
            requests={
                'cpu': cpu_request_max * .8,
                'memory': str(mem_request_max - 1) + 'G'
            })

        # update script task with job info
        if self.script_task:
            self.script_task.cpu_request = cpu_request_max * .8
            self.script_task.cpu_max = cpu_request_max
            self.script_task.memory_request = mem_request_max - 1
            self.script_task.memory_max = mem_request_max
            self.script_task.instance_name = self.inst_name
            self.script_task.force_standard = not self.preemptible
            self.script_task.pool_name = str(self.node_label)
            self.script_task.instance_type = str(
                self.nodepool_info["inst_type"])

        # place the job in the appropriate node pool
        node_label_dict = {'poolName': str(self.node_label)}

        # build volumes
        volumes.append(
            client.V1Volume(
                name=self.volume_name,
                persistent_volume_claim=client.
                V1PersistentVolumeClaimVolumeSource(claim_name=self.pvc_name)))

        # incorporate configured persistent volumes if associated with the current task
        if self.extra_persistent_volumes:
            for pv in self.extra_persistent_volumes:
                if pv['task_prefix'] in self.name:
                    claim_name = pv["pvc_name"]
                    if 'dynamic' in pv and pv['dynamic']:
                        claim_name = claim_name[:
                                                57] + '-' + Platform.generate_unique_id(
                                                    id_len=5)
                    # need to add the extra persistent volume
                    volume_mounts.append(
                        client.V1VolumeMount(mount_path=pv["path"],
                                             name=pv['volume_name'],
                                             read_only=pv['read_only']))
                    volumes.append(
                        client.V1Volume(name=pv['volume_name'],
                                        persistent_volume_claim=client.
                                        V1PersistentVolumeClaimVolumeSource(
                                            claim_name=claim_name)))

                    # specify volumes for script task
                    if self.script_task:
                        if 'dynamic' in pv and pv['dynamic']:
                            self.script_task.extra_volumes.append({
                                "path":
                                pv["path"],
                                "name":
                                pv["volume_name"],
                                "storage":
                                pv["size"],
                                "read_only":
                                pv["read_only"],
                                "claim_name":
                                claim_name,
                                "command":
                                pv["copy_command"],
                                "dynamic":
                                True
                            })
                        else:
                            self.script_task.extra_volumes.append({
                                "path":
                                pv["path"],
                                "name":
                                pv["volume_name"],
                                "read_only":
                                pv["read_only"],
                                "claim_name":
                                claim_name,
                                "dynamic":
                                False
                            })

        # incorporate configured secrets
        if self.gcp_secret_configured:
            volume_mounts.append(
                client.V1VolumeMount(
                    mount_path="/etc/cloud_conductor/gcp.json",
                    sub_path="gcp.json",
                    name="secret-volume",
                    read_only=True))
            volumes.append(
                client.V1Volume(name="secret-volume",
                                secret=client.V1SecretVolumeSource(
                                    secret_name="cloud-conductor-config",
                                    items=[
                                        client.V1KeyToPath(key="gcp_json",
                                                           path="gcp.json")
                                    ])))
            env_variables.append(
                client.V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS',
                                value='/etc/cloud_conductor/gcp.json'))
            env_variables.append(
                client.V1EnvVar(name='RCLONE_CONFIG_GS_TYPE',
                                value='google cloud storage'))
            env_variables.append(
                client.V1EnvVar(name='RCLONE_CONFIG_GS_SERVICE_ACCOUNT_FILE',
                                value='$GOOGLE_APPLICATION_CREDENTIALS'))
            env_variables.append(
                client.V1EnvVar(name='RCLONE_CONFIG_GS_OBJECT_ACL',
                                value='projectPrivate'))
            env_variables.append(
                client.V1EnvVar(name='RCLONE_CONFIG_GS_BUCKET_ACL',
                                value='projectPrivate'))

        if self.aws_secret_configured:
            env_variables.append(
                client.V1EnvVar(
                    name='AWS_ACCESS_KEY_ID',
                    value_from=client.V1EnvVarSource(
                        secret_key_ref=client.V1SecretKeySelector(
                            name='cloud-conductor-config', key='aws_id'))))
            env_variables.append(
                client.V1EnvVar(
                    name='AWS_SECRET_ACCESS_KEY',
                    value_from=client.V1EnvVarSource(
                        secret_key_ref=client.V1SecretKeySelector(
                            name='cloud-conductor-config', key='aws_access'))))
            env_variables.append(
                client.V1EnvVar(name='RCLONE_CONFIG_S3_TYPE', value='s3'))
            env_variables.append(
                client.V1EnvVar(
                    name='RCLONE_CONFIG_S3_ACCESS_KEY_ID',
                    value_from=client.V1EnvVarSource(
                        secret_key_ref=client.V1SecretKeySelector(
                            name='cloud-conductor-config', key='aws_id'))))
            env_variables.append(
                client.V1EnvVar(
                    name='RCLONE_CONFIG_S3_SECRET_ACCESS_KEY',
                    value_from=client.V1EnvVarSource(
                        secret_key_ref=client.V1SecretKeySelector(
                            name='cloud-conductor-config', key='aws_access'))))

        storage_image = 'gcr.io/cloud-builders/gsutil'
        storage_tasks = ['mkdir_', 'grant_']
        container_name_list = []

        for k, v in self.processes.items():
            # if the process is for storage (i.e. mkdir, etc.)
            entrypoint = ["/bin/bash", "-c"]
            if any(x in k for x in storage_tasks) or not v['docker_image']:
                container_image = storage_image
            else:
                container_image = v['docker_image']
                if v['docker_entrypoint'] is not None and v[
                        'original_cmd'].find(v['docker_entrypoint']) == -1:
                    v['original_cmd'] = v['docker_entrypoint'] + ' ' + v[
                        'original_cmd']
                if 'rclone' in container_image:
                    v['original_cmd'] = v['original_cmd'].replace(
                        "|&", "2>&1 |")
                    entrypoint = ["/bin/sh", "-c"]
            args = v['original_cmd']
            if not isinstance(args, list):
                args = [v['original_cmd'].replace("sudo ", "")]
            args = " && ".join(args)
            args = args.replace("\n", " ")
            args = args.replace("java.io.tmpdir=/tmp/",
                                "java.io.tmpdir=/data/tmp/")

            if "awk " in args:
                args = re.sub("'\"'\"'", "'", args)

            if "gsutil" in args:
                args = "gcloud auth activate-service-account --key-file $GOOGLE_APPLICATION_CREDENTIALS && sleep 10; " + args

            # add in pipe error handling
            # if "copy_input" in k or "copy_output" in k:
            #     args = "set -o pipefail && " + args

            logging.debug(f"({self.name}) Command for task {k} is : {args}")

            # format the container name and roll call to logging
            container_name = k.replace("_", "-").replace(".", "-").lower()
            formatted_container_name = container_name[:
                                                      57] + '-' + Platform.generate_unique_id(
                                                          id_len=5)
            while formatted_container_name in container_name_list:
                # make sure all container names are unique
                formatted_container_name = container_name[:
                                                          57] + '-' + Platform.generate_unique_id(
                                                              id_len=5)
            container_name_list.append(formatted_container_name)

            # args = f">&2 echo STARTING TASK {container_name} && " + args

            containers.append(
                client.V1Container(
                    # lifecycle=client.V1Lifecycle(post_start=post_start_handler),
                    image=container_image,
                    command=entrypoint,
                    args=[args],
                    name=formatted_container_name,
                    volume_mounts=volume_mounts,
                    env=env_variables,
                    resources=resource_def,
                    image_pull_policy='IfNotPresent'))

            if self.script_task and container_name not in self.script_task.commands:
                self.script_task.commands[container_name] = ({
                    "name": formatted_container_name,
                    "docker_image": container_image,
                    "entrypoint": entrypoint,
                    "args": [args]
                })

        job_spec = dict(backoff_limit=self.default_num_cmd_retries)

        self.job_containers = containers

        # Run jobs in order using init_containers
        # See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/
        if len(containers) > 1:
            init_containers = containers[:-1]
            containers = [containers[-1]]
        else:
            containers = containers
            init_containers = None

        # define the pod spec
        job_template = client.V1PodTemplateSpec()
        job_labels = {}
        job_labels[self.inst_name] = 'CC-Job'
        # add annotation to prevent autoscaler from killing nodes running jobs
        annotations = {
            'cluster-autoscaler.kubernetes.io/safe-to-evict': 'false'
        }
        job_template.metadata = client.V1ObjectMeta(labels=job_labels,
                                                    annotations=annotations)
        job_template.spec = client.V1PodSpec(
            init_containers=init_containers,
            containers=containers,
            volumes=volumes,
            restart_policy='Never',
            termination_grace_period_seconds=self.termination_seconds,
            node_selector=node_label_dict)

        job_def.spec = client.V1JobSpec(template=job_template, **job_spec)

        if self.script_task:
            self.script_task.num_retries = self.default_num_cmd_retries
            for k, v in job_labels.items():
                self.script_task.labels.append({"key": k, "value": v})
            for k, v in annotations.items():
                self.script_task.annotations.append({"key": k, "value": v})

        return job_def
示例#16
0
def create_deployment(apps_v1_api, username, token, gpu):
    name = 'jlab-{}'.format(username)
    try:
        init_container = client.V1Container(
            name='{}-init'.format(name),
            image="ubuntu:18.04",
            image_pull_policy="IfNotPresent",
            command=["/bin/sh"],
            args=["-c", "chown 1001:1001 /persistent_volume"],
            volume_mounts=[
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/persistent_volume",
                    sub_path='{}/jupyter'.format(username)
                )
            ]
        )
        if gpu == True:
            limits = {
                'nvidia.com/gpu': 1
            }
        else:
            limits = None
        container = client.V1Container(
            name=name,
            image=envvars.DOCKER_IMAGE_JLAB_SERVER,
            resources=client.V1ResourceRequirements(
                limits=limits
            ),
            image_pull_policy="Always",
            ports=[client.V1ContainerPort(container_port=8888)],
            env=[
                client.V1EnvVar(
                    name='DES_USER',
                    value=username
                ),
                client.V1EnvVar(
                    name='PIP_TARGET',
                    value='/home/jovyan/work/.pip'
                ),
                client.V1EnvVar(
                    name='PYTHONPATH',
                    value='/home/jovyan/work/.pip'
                )
            ],
            volume_mounts=[
                client.V1VolumeMount(
                    name='jupyter-config',
                    mount_path="/home/jovyan/.jupyter/"
                ),
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/home/jovyan/jobs/cutout",
                    sub_path='{}/cutout'.format(username)
                ),
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/home/jovyan/jobs/query",
                    sub_path='{}/query'.format(username)
                ),
                client.V1VolumeMount(
                    name='persistent-volume',
                    mount_path="/home/jovyan/work",
                    sub_path='{}/jupyter'.format(username)
                )
            ]
        )
        volume_config = client.V1Volume(
            name='jupyter-config',
            config_map=client.V1ConfigMapVolumeSource(
                name=name,
                items=[client.V1KeyToPath(
                    key=name,
                    path="jupyter_notebook_config.py"
                )]
            )
        )
        volume_persistent = client.V1Volume(
            name='persistent-volume',
            persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
                claim_name=envvars.PVC_NAME_BASE
            )
        )
        # Template
        template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(labels={"app": name}),
                spec=client.V1PodSpec(
                    image_pull_secrets=[
                        client.V1LocalObjectReference(
                            name='registry-auth'
                        )
                    ],
                    init_containers=[
                        init_container
                    ],
                    containers=[
                        container
                    ],
                    volumes=[
                        volume_config,
                        volume_persistent
                    ],
                    node_selector = {'gpu': '{}'.format(gpu).lower()}
                )
            )
        # Spec
        spec = client.V1DeploymentSpec(
            replicas=1,
            template=template,
            selector=client.V1LabelSelector(
                match_labels=dict({'app': name})
            )
        )
        # Deployment
        deployment = client.V1Deployment(
            api_version="apps/v1",
            kind="Deployment",
            metadata=client.V1ObjectMeta(name=name),
            spec=spec)
        # Creation of the Deployment in specified namespace
        api_response = apps_v1_api.create_namespaced_deployment(
            namespace=namespace, body=deployment
        )
        # logger.info('Deployment created:\n{}'.format(api_response))
    except ApiException as e:
        error_msg = str(e).strip()
        logger.error(error_msg)
示例#17
0
def get_statefulset_object(cluster_object):
    name = cluster_object['metadata']['name']
    namespace = cluster_object['metadata']['namespace']

    try:
        replicas = cluster_object['spec']['mongodb']['replicas']
    except KeyError:
        replicas = 3

    try:
        mongodb_limit_cpu = \
            cluster_object['spec']['mongodb']['mongodb_limit_cpu']
    except KeyError:
        mongodb_limit_cpu = '100m'

    try:
        mongodb_limit_memory = \
            cluster_object['spec']['mongodb']['mongodb_limit_memory']
    except KeyError:
        mongodb_limit_memory = '64Mi'

    try:
        hard_pod_anti_affinity = \
            cluster_object['spec']['mongodb']['hard_pod_anti_affinity']
    except KeyError:
        hard_pod_anti_affinity = True

    statefulset = client.V1beta1StatefulSet()

    # Metadata
    statefulset.metadata = client.V1ObjectMeta(
        name=name, namespace=namespace, labels=get_default_labels(name=name))

    # Spec
    statefulset.spec = client.V1beta1StatefulSetSpec(
        replicas=replicas,
        service_name=name,
        template=client.V1PodTemplateSpec())

    statefulset.spec.template.metadata = client.V1ObjectMeta(
        labels=get_default_labels(name=name))

    statefulset.spec.template.spec = client.V1PodSpec(containers=[])

    pod_affinity_term = client.V1PodAffinityTerm(
        topology_key='kubernetes.io/hostname',
        label_selector=client.V1LabelSelector(match_expressions=[
            client.V1LabelSelectorRequirement(
                key='cluster', operator='In', values=[name])
        ]))

    pod_anti_affinity = client.V1PodAntiAffinity(
        required_during_scheduling_ignored_during_execution=[
            pod_affinity_term
        ])

    if not hard_pod_anti_affinity:
        pod_anti_affinity = client.V1PodAntiAffinity(
            preferred_during_scheduling_ignored_during_execution=[
                client.V1WeightedPodAffinityTerm(
                    weight=100, pod_affinity_term=pod_affinity_term)
            ])

    statefulset.spec.template.spec.affinity = client.V1Affinity(
        pod_anti_affinity=pod_anti_affinity)

    # MongoDB container
    mongodb_port = client.V1ContainerPort(name='mongodb',
                                          container_port=27017,
                                          protocol='TCP')
    mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod')
    mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data',
                                                    read_only=False,
                                                    mount_path='/data/db')
    mongodb_resources = client.V1ResourceRequirements(limits={
        'cpu':
        mongodb_limit_cpu,
        'memory':
        mongodb_limit_memory
    },
                                                      requests={
                                                          'cpu':
                                                          mongodb_limit_cpu,
                                                          'memory':
                                                          mongodb_limit_memory
                                                      })
    mongodb_container = client.V1Container(
        name='mongod',
        env=[
            client.V1EnvVar(
                name='POD_IP',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='status.podIP')))
        ],
        command=[
            'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL',
            '--clusterAuthMode', 'x509', '--sslPEMKeyFile',
            '/etc/ssl/mongod/mongod.pem', '--sslCAFile',
            '/etc/ssl/mongod/ca.pem', '--bind_ip', '127.0.0.1,$(POD_IP)'
        ],
        image='mongo:3.6.4',
        ports=[mongodb_port],
        volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount],
        resources=mongodb_resources)

    # Metrics container
    metrics_port = client.V1ContainerPort(name='metrics',
                                          container_port=9001,
                                          protocol='TCP')
    metrics_resources = client.V1ResourceRequirements(limits={
        'cpu': '50m',
        'memory': '16Mi'
    },
                                                      requests={
                                                          'cpu': '50m',
                                                          'memory': '16Mi'
                                                      })
    metrics_secret_name = '{}-monitoring-credentials'.format(name)
    metrics_username_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_USERNAME',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='username')))
    metrics_password_env_var = client.V1EnvVar(
        name='MONGODB_MONITORING_PASSWORD',
        value_from=client.V1EnvVarSource(
            secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name,
                                                      key='password')))
    metrics_container = client.V1Container(
        name='prometheus-exporter',
        image='quay.io/kubestack/prometheus-mongodb-exporter:latest',
        command=[
            '/bin/sh', '-c',
            '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem'
        ],  # flake8: noqa
        ports=[metrics_port],
        resources=metrics_resources,
        volume_mounts=[mongodb_tls_volumemount],
        env=[metrics_username_env_var, metrics_password_env_var])

    statefulset.spec.template.spec.containers = [
        mongodb_container, metrics_container
    ]

    ca_volume = client.V1Volume(name='mongo-ca',
                                secret=client.V1SecretVolumeSource(
                                    secret_name='{}-ca'.format(name),
                                    items=[
                                        client.V1KeyToPath(key='ca.pem',
                                                           path='ca.pem'),
                                        client.V1KeyToPath(key='ca-key.pem',
                                                           path='ca-key.pem')
                                    ]))
    tls_volume = client.V1Volume(name='mongo-tls',
                                 empty_dir=client.V1EmptyDirVolumeSource())
    data_volume = client.V1Volume(name='mongo-data',
                                  empty_dir=client.V1EmptyDirVolumeSource())
    statefulset.spec.template.spec.volumes = [
        ca_volume, tls_volume, data_volume
    ]

    # Init container
    tls_init_ca_volumemount = client.V1VolumeMount(
        name='mongo-ca', read_only=True, mount_path='/etc/ssl/mongod-ca')
    tls_init_mongodb_tls_volumemount = client.V1VolumeMount(
        name='mongo-tls', read_only=False, mount_path='/etc/ssl/mongod')
    tls_init_container = client.V1Container(
        name="cert-init",
        image="quay.io/kubestack/mongodb-init:latest",
        volume_mounts=[
            tls_init_ca_volumemount, tls_init_mongodb_tls_volumemount
        ],
        env=[
            client.V1EnvVar(
                name='METADATA_NAME',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='metadata.name'))),
            client.V1EnvVar(
                name='NAMESPACE',
                value_from=client.V1EnvVarSource(
                    field_ref=client.V1ObjectFieldSelector(
                        api_version='v1', field_path='metadata.namespace')))
        ],
        command=["ansible-playbook", "member-cert.yml"])

    statefulset.spec.template.spec.init_containers = [tls_init_container]

    return statefulset