def test_construct_pod(self, mock_uuid): path = sys.path[ 0] + '/tests/kubernetes/pod_generator_base_with_secrets.yaml' worker_config = PodGenerator.deserialize_model_file(path) mock_uuid.return_value = self.static_uuid executor_config = k8s.V1Pod(spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name='', resources=k8s.V1ResourceRequirements(limits={ 'cpu': '1m', 'memory': '1G' })) ])) result = PodGenerator.construct_pod( dag_id=self.dag_id, task_id=self.task_id, pod_id='pod_id', kube_image='airflow_image', try_number=self.try_number, date=self.execution_date, args=['command'], pod_override_object=executor_config, base_worker_pod=worker_config, namespace='test_namespace', scheduler_job_id='uuid', ) expected = self.expected expected.metadata.labels = self.labels expected.metadata.labels['app'] = 'myapp' expected.metadata.annotations = self.annotations expected.metadata.name = 'pod_id.' + self.static_uuid.hex expected.metadata.namespace = 'test_namespace' expected.spec.containers[0].args = ['command'] expected.spec.containers[0].image = 'airflow_image' expected.spec.containers[0].resources = { 'limits': { 'cpu': '1m', 'memory': '1G' } } expected.spec.containers[0].env.append( k8s.V1EnvVar( name="AIRFLOW_IS_K8S_EXECUTOR_POD", value='True', )) result_dict = self.k8s_client.sanitize_for_serialization(result) expected_dict = self.k8s_client.sanitize_for_serialization( self.expected) assert expected_dict == result_dict
def test_init_environment_using_git_sync_ssh_without_known_hosts(self): # Tests the init environment created with git-sync SSH authentication option is correct # without known hosts file self.kube_config.airflow_configmap = 'airflow-configmap' self.kube_config.git_ssh_secret_name = 'airflow-secrets' self.kube_config.git_ssh_known_hosts_configmap_name = None self.kube_config.dags_volume_claim = None self.kube_config.dags_volume_host = None self.kube_config.dags_in_image = None worker_config = WorkerConfiguration(self.kube_config) init_containers = worker_config._get_init_containers() self.assertTrue(init_containers) # check not empty env = init_containers[0].env self.assertIn( k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_ADD_USER', value='true'), env) self.assertIn(k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='false'), env) self.assertIn(k8s.V1EnvVar(name='GIT_SYNC_SSH', value='true'), env)
def test_make_pod_git_sync_credentials_secret(self): # Tests the pod created with git_sync_credentials_secret will get into the init container self.kube_config.git_sync_credentials_secret = 'airflow-git-creds-secret' self.kube_config.dags_volume_claim = None self.kube_config.dags_volume_host = None self.kube_config.dags_in_image = None self.kube_config.worker_fs_group = None self.kube_config.git_dags_folder_mount_point = 'dags' self.kube_config.git_sync_dest = 'repo' self.kube_config.git_subpath = 'path' worker_config = WorkerConfiguration(self.kube_config) pod = worker_config.as_pod() username_env = k8s.V1EnvVar( name='GIT_SYNC_USERNAME', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.kube_config.git_sync_credentials_secret, key='GIT_SYNC_USERNAME') ) ) password_env = k8s.V1EnvVar( name='GIT_SYNC_PASSWORD', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.kube_config.git_sync_credentials_secret, key='GIT_SYNC_PASSWORD') ) ) self.assertIn(username_env, pod.spec.init_containers[0].env, 'The username env for git credentials did not get into the init container') self.assertIn(password_env, pod.spec.init_containers[0].env, 'The password env for git credentials did not get into the init container')
def test_env_vars(self): k = KubernetesPodOperator( namespace="default", image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], env_vars=[k8s.V1EnvVar(name="{{ bar }}", value="{{ foo }}")], labels={"foo": "bar"}, name="test", task_id="task", in_cluster=False, do_xcom_push=False, ) k.render_template_fields(context={"foo": "footemplated", "bar": "bartemplated"}) assert k.env_vars[0].value == "footemplated" assert k.env_vars[0].name == "bartemplated"
def convert_env_vars(env_vars) -> List[k8s.V1EnvVar]: """ Converts a dictionary into a list of env_vars :param env_vars: :return: """ if isinstance(env_vars, dict): res = [] for k, v in env_vars.items(): res.append(k8s.V1EnvVar(name=k, value=v)) return res elif isinstance(env_vars, list): return env_vars else: raise AirflowException(f"Expected dict or list, got {type(env_vars)}")
def test_pod_template_file_with_overrides_system(self): fixture = sys.path[0] + '/tests/kubernetes/basic_pod.yaml' k = KubernetesPodOperator(task_id="task" + self.get_current_task_name(), labels={ "foo": "bar", "fizz": "buzz" }, env_vars={"env_name": "value"}, in_cluster=False, pod_template_file=fixture, do_xcom_push=True) context = create_context(k) result = k.execute(context) self.assertIsNotNone(result) self.assertEqual(k.pod.metadata.labels, {'fizz': 'buzz', 'foo': 'bar'}) self.assertEqual(k.pod.spec.containers[0].env, [k8s.V1EnvVar(name="env_name", value="value")]) self.assertDictEqual(result, {"hello": "world"})
def mock_kubernetes_read_namespaced_pod(*_args, **_kwargs): """ Represents the mocked output of kubernetes.client.read_namespaced_pod """ return models.V1Pod( metadata=models.V1ObjectMeta( namespace="default", name="gordo-test-pod-name-1234", labels={"app": "gordo-model-builder"}, ), status=models.V1PodStatus(phase="Running"), spec=models.V1PodSpec(containers=[ models.V1Container( name="some-generated-test-container-name", env=[ models.V1EnvVar(name="MACHINE_NAME", value="test-machine-name") ], ) ]), )
def test_make_pod_git_sync_rev(self): # Tests the pod created with git_sync_credentials_secret will get into the init container self.kube_config.git_sync_rev = 'sampletag' self.kube_config.dags_volume_claim = None self.kube_config.dags_volume_host = None self.kube_config.dags_in_image = None self.kube_config.worker_fs_group = None self.kube_config.git_dags_folder_mount_point = 'dags' self.kube_config.git_sync_dest = 'repo' self.kube_config.git_subpath = 'path' worker_config = WorkerConfiguration(self.kube_config) pod = worker_config.as_pod() rev_env = k8s.V1EnvVar( name='GIT_SYNC_REV', value=self.kube_config.git_sync_rev, ) self.assertIn(rev_env, pod.spec.init_containers[0].env, 'The git_sync_rev env did not get into the init container')
def test_env_vars(self): # WHEN env_vars = [k8s.V1EnvVar(name="{{ bar }}", value='{{ foo }}')] from tests.models import DEFAULT_DATE with DAG("test-dag", start_date=DEFAULT_DATE): k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], env_vars=env_vars, labels={"foo": "bar"}, name="test", task_id="task", in_cluster=False, do_xcom_push=False, ) k.render_template_fields(context={ "foo": "footemplated", "bar": "bartemplated" }) assert k.env_vars[0].value == "footemplated" assert k.env_vars[0].name == "bartemplated"
def to_env_secret(self) -> k8s.V1EnvVar: """Stores es environment secret""" return k8s.V1EnvVar(name=self.deploy_target, value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.secret, key=self.key)))
def test_pod_template_file_override_in_executor_config( self, mock_get_kube_client, mock_run_pod_async): current_folder = pathlib.Path(__file__).parent.absolute() template_file = str( (current_folder / "kubernetes_executor_template_files" / "basic_template.yaml").absolute()) mock_kube_client = mock.patch('kubernetes.client.CoreV1Api', autospec=True) mock_get_kube_client.return_value = mock_kube_client with conf_vars({('kubernetes', 'pod_template_file'): ''}): executor = self.kubernetes_executor executor.start() assert executor.event_buffer == {} assert executor.task_queue.empty() execution_date = datetime.utcnow() executor.execute_async( key=('dag', 'task', execution_date, 1), queue=None, command=['airflow', 'tasks', 'run', 'true', 'some_parameter'], executor_config={ "pod_template_file": template_file, "pod_override": k8s.V1Pod( metadata=k8s.V1ObjectMeta( labels={"release": "stable"}), spec=k8s.V1PodSpec(containers=[ k8s.V1Container(name="base", image="airflow:3.6") ], ), ), }, ) assert not executor.task_queue.empty() task = executor.task_queue.get_nowait() _, _, expected_executor_config, expected_pod_template_file = task # Test that the correct values have been put to queue assert expected_executor_config.metadata.labels == { 'release': 'stable' } assert expected_pod_template_file == template_file self.kubernetes_executor.kube_scheduler.run_next(task) mock_run_pod_async.assert_called_once_with( k8s.V1Pod( api_version="v1", kind="Pod", metadata=k8s.V1ObjectMeta( name=mock.ANY, namespace="default", annotations={ 'dag_id': 'dag', 'execution_date': execution_date.isoformat(), 'task_id': 'task', 'try_number': '1', }, labels={ 'airflow-worker': '5', 'airflow_version': mock.ANY, 'dag_id': 'dag', 'execution_date': datetime_to_label_safe_datestring(execution_date), 'kubernetes_executor': 'True', 'mylabel': 'foo', 'release': 'stable', 'task_id': 'task', 'try_number': '1', }, ), spec=k8s.V1PodSpec( containers=[ k8s.V1Container( name="base", image="airflow:3.6", args=[ 'airflow', 'tasks', 'run', 'true', 'some_parameter' ], env=[ k8s.V1EnvVar( name='AIRFLOW_IS_K8S_EXECUTOR_POD', value='True') ], ) ], image_pull_secrets=[ k8s.V1LocalObjectReference(name='airflow-registry') ], scheduler_name='default-scheduler', security_context=k8s.V1PodSecurityContext( fs_group=50000, run_as_user=50000), ), ))
def __init__( self, image=None, name=None, namespace=None, volume_mounts=None, envs=None, cmds=None, args=None, labels=None, node_selectors=None, ports=None, volumes=None, image_pull_policy=None, restart_policy=None, image_pull_secrets=None, init_containers=None, service_account_name=None, resources=None, annotations=None, affinity=None, hostnetwork=False, tolerations=None, security_context=None, configmaps=None, dnspolicy=None, schedulername=None, priority_class_name=None, pod=None, pod_template_file=None, extract_xcom=False, ): if pod_template_file: self.ud_pod = self.deserialize_model_file(pod_template_file) else: self.ud_pod = pod self.pod = k8s.V1Pod() self.pod.api_version = 'v1' self.pod.kind = 'Pod' # Pod Metadata self.metadata = k8s.V1ObjectMeta() self.metadata.labels = labels self.metadata.name = name self.metadata.namespace = namespace self.metadata.annotations = annotations # Pod Container self.container = k8s.V1Container(name='base') self.container.image = image self.container.env = [] if envs: if isinstance(envs, dict): for key, val in envs.items(): self.container.env.append(k8s.V1EnvVar(name=key, value=val)) elif isinstance(envs, list): self.container.env.extend(envs) configmaps = configmaps or [] self.container.env_from = [] for configmap in configmaps: self.container.env_from.append( k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource( name=configmap))) self.container.command = cmds or [] self.container.args = args or [] self.container.image_pull_policy = image_pull_policy self.container.ports = ports or [] self.container.resources = resources self.container.volume_mounts = [ v.to_k8s_client_obj() for v in _extract_volume_mounts(volume_mounts) ] # Pod Spec self.spec = k8s.V1PodSpec(containers=[]) self.spec.security_context = security_context self.spec.tolerations = tolerations self.spec.dns_policy = dnspolicy self.spec.scheduler_name = schedulername self.spec.host_network = hostnetwork self.spec.affinity = affinity self.spec.service_account_name = service_account_name self.spec.init_containers = init_containers self.spec.volumes = volumes or [] self.spec.node_selector = node_selectors self.spec.restart_policy = restart_policy self.spec.priority_class_name = priority_class_name self.spec.image_pull_secrets = [] if image_pull_secrets: for image_pull_secret in image_pull_secrets.split(','): self.spec.image_pull_secrets.append( k8s.V1LocalObjectReference(name=image_pull_secret)) # Attach sidecar self.extract_xcom = extract_xcom
def construct_pod( dag_id: str, task_id: str, pod_id: str, try_number: int, kube_image: str, date: Optional[datetime.datetime], args: List[str], pod_override_object: Optional[k8s.V1Pod], base_worker_pod: k8s.V1Pod, namespace: str, scheduler_job_id: int, run_id: Optional[str] = None, ) -> k8s.V1Pod: """ Construct a pod by gathering and consolidating the configuration from 3 places: - airflow.cfg - executor_config - dynamic arguments """ try: image = pod_override_object.spec.containers[ 0].image # type: ignore if not image: image = kube_image except Exception: image = kube_image annotations = { 'dag_id': dag_id, 'task_id': task_id, 'try_number': str(try_number), } labels = { 'airflow-worker': make_safe_label_value(str(scheduler_job_id)), 'dag_id': make_safe_label_value(dag_id), 'task_id': make_safe_label_value(task_id), 'try_number': str(try_number), 'airflow_version': airflow_version.replace('+', '-'), 'kubernetes_executor': 'True', } if date: annotations['execution_date'] = date.isoformat() labels['execution_date'] = datetime_to_label_safe_datestring(date) if run_id: annotations['run_id'] = run_id labels['run_id'] = make_safe_label_value(run_id) dynamic_pod = k8s.V1Pod( metadata=k8s.V1ObjectMeta( namespace=namespace, annotations=annotations, name=PodGenerator.make_unique_pod_id(pod_id), labels=labels, ), spec=k8s.V1PodSpec(containers=[ k8s.V1Container( name="base", args=args, image=image, env=[ k8s.V1EnvVar(name="AIRFLOW_IS_K8S_EXECUTOR_POD", value="True") ], ) ]), ) # Reconcile the pods starting with the first chronologically, # Pod from the pod_template_File -> Pod from executor_config arg -> Pod from the K8s executor pod_list = [base_worker_pod, pod_override_object, dynamic_pod] return reduce(PodGenerator.reconcile_pods, pod_list)
def _get_init_containers(self) -> List[k8s.V1Container]: """When using git to retrieve the DAGs, use the GitSync Init Container""" # If we're using volume claims to mount the dags, no init container is needed if self.kube_config.dags_volume_claim or \ self.kube_config.dags_volume_host or self.kube_config.dags_in_image: return [] # Otherwise, define a git-sync init container init_environment = [ k8s.V1EnvVar(name='GIT_SYNC_REPO', value=self.kube_config.git_repo), k8s.V1EnvVar(name='GIT_SYNC_BRANCH', value=self.kube_config.git_branch), k8s.V1EnvVar(name='GIT_SYNC_ROOT', value=self.kube_config.git_sync_root), k8s.V1EnvVar(name='GIT_SYNC_DEST', value=self.kube_config.git_sync_dest), k8s.V1EnvVar(name='GIT_SYNC_REV', value=self.kube_config.git_sync_rev), k8s.V1EnvVar(name='GIT_SYNC_DEPTH', value='1'), k8s.V1EnvVar(name='GIT_SYNC_ONE_TIME', value='true') ] if self.kube_config.git_user: init_environment.append( k8s.V1EnvVar(name='GIT_SYNC_USERNAME', value=self.kube_config.git_user)) if self.kube_config.git_password: init_environment.append( k8s.V1EnvVar(name='GIT_SYNC_PASSWORD', value=self.kube_config.git_password)) volume_mounts = [ k8s.V1VolumeMount(mount_path=self.kube_config.git_sync_root, name=self.dags_volume_name, read_only=False) ] if self.kube_config.git_sync_credentials_secret: init_environment.extend([ k8s.V1EnvVar( name='GIT_SYNC_USERNAME', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.kube_config.git_sync_credentials_secret, key='GIT_SYNC_USERNAME'))), k8s.V1EnvVar( name='GIT_SYNC_PASSWORD', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( name=self.kube_config.git_sync_credentials_secret, key='GIT_SYNC_PASSWORD'))) ]) if self.kube_config.git_ssh_key_secret_name: volume_mounts.append( k8s.V1VolumeMount(name=self.git_sync_ssh_secret_volume_name, mount_path='/etc/git-secret/ssh', sub_path='ssh')) init_environment.extend([ k8s.V1EnvVar(name='GIT_SSH_KEY_FILE', value='/etc/git-secret/ssh'), k8s.V1EnvVar(name='GIT_SYNC_SSH', value='true') ]) if self.kube_config.git_ssh_known_hosts_configmap_name: volume_mounts.append( k8s.V1VolumeMount( name=self.git_sync_ssh_known_hosts_volume_name, mount_path='/etc/git-secret/known_hosts', sub_path='known_hosts')) init_environment.extend([ k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='true'), k8s.V1EnvVar(name='GIT_SSH_KNOWN_HOSTS_FILE', value='/etc/git-secret/known_hosts') ]) else: init_environment.append( k8s.V1EnvVar(name='GIT_KNOWN_HOSTS', value='false')) init_containers = k8s.V1Container( name=self.kube_config.git_sync_init_container_name, image=self.kube_config.git_sync_container, env=init_environment, volume_mounts=volume_mounts) if self.kube_config.git_sync_run_as_user != "": init_containers.security_context = k8s.V1SecurityContext( run_as_user=self.kube_config.git_sync_run_as_user ) # git-sync user return [init_containers]
k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource(name='test-configmap-1')), k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource(name='test-configmap-2')), ] volume = k8s.V1Volume( name='test-volume', persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='test-volume'), ) port = k8s.V1ContainerPort(name='http', container_port=80) init_container_volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ] init_environments = [k8s.V1EnvVar(name='key1', value='value1'), k8s.V1EnvVar(name='key2', value='value2')] init_container = k8s.V1Container( name="init-container", image="ubuntu:16.04", env=init_environments, volume_mounts=init_container_volume_mounts, command=["bash", "-cx"], args=["echo 10"], ) affinity = k8s.V1Affinity( node_affinity=k8s.V1NodeAffinity( preferred_during_scheduling_ignored_during_execution=[ k8s.V1PreferredSchedulingTerm( weight=1,
def __init__( self, image: Optional[str] = None, name: Optional[str] = None, namespace: Optional[str] = None, volume_mounts: Optional[List[Union[k8s.V1VolumeMount, dict]]] = None, envs: Optional[Dict[str, str]] = None, cmds: Optional[List[str]] = None, args: Optional[List[str]] = None, labels: Optional[Dict[str, str]] = None, node_selectors: Optional[Dict[str, str]] = None, ports: Optional[List[Union[k8s.V1ContainerPort, dict]]] = None, volumes: Optional[List[Union[k8s.V1Volume, dict]]] = None, image_pull_policy: Optional[str] = None, restart_policy: Optional[str] = None, image_pull_secrets: Optional[str] = None, init_containers: Optional[List[k8s.V1Container]] = None, service_account_name: Optional[str] = None, resources: Optional[Union[k8s.V1ResourceRequirements, dict]] = None, annotations: Optional[Dict[str, str]] = None, affinity: Optional[dict] = None, hostnetwork: bool = False, tolerations: Optional[list] = None, security_context: Optional[Union[k8s.V1PodSecurityContext, dict]] = None, configmaps: Optional[List[str]] = None, dnspolicy: Optional[str] = None, schedulername: Optional[str] = None, extract_xcom: bool = False, priority_class_name: Optional[str] = None, ): self.pod = k8s.V1Pod() self.pod.api_version = 'v1' self.pod.kind = 'Pod' # Pod Metadata self.metadata = k8s.V1ObjectMeta() self.metadata.labels = labels self.metadata.name = name self.metadata.namespace = namespace self.metadata.annotations = annotations # Pod Container self.container = k8s.V1Container(name='base') self.container.image = image self.container.env = [] if envs: if isinstance(envs, dict): for key, val in envs.items(): self.container.env.append(k8s.V1EnvVar(name=key, value=val)) elif isinstance(envs, list): self.container.env.extend(envs) configmaps = configmaps or [] self.container.env_from = [] for configmap in configmaps: self.container.env_from.append( k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource( name=configmap))) self.container.command = cmds or [] self.container.args = args or [] if image_pull_policy: self.container.image_pull_policy = image_pull_policy self.container.ports = ports or [] self.container.resources = resources self.container.volume_mounts = volume_mounts or [] # Pod Spec self.spec = k8s.V1PodSpec(containers=[]) self.spec.security_context = security_context self.spec.tolerations = tolerations if dnspolicy: self.spec.dns_policy = dnspolicy self.spec.scheduler_name = schedulername self.spec.host_network = hostnetwork self.spec.affinity = affinity self.spec.service_account_name = service_account_name self.spec.init_containers = init_containers self.spec.volumes = volumes or [] self.spec.node_selector = node_selectors if restart_policy: self.spec.restart_policy = restart_policy self.spec.priority_class_name = priority_class_name self.spec.image_pull_secrets = [] if image_pull_secrets: for image_pull_secret in image_pull_secrets.split(','): self.spec.image_pull_secrets.append( k8s.V1LocalObjectReference(name=image_pull_secret)) # Attach sidecar self.extract_xcom = extract_xcom
def to_k8s_client_obj(self) -> k8s.V1EnvVar: """:return: kubernetes.client.models.V1EnvVar""" return k8s.V1EnvVar( name=self.name, value_from=k8s.V1EnvVarSource(field_ref=k8s.V1ObjectFieldSelector( field_path=self.field_path)))
def pipeline_definition( hydrosphere_address="{hydrosphere-instance-address}", # <-- Replace with correct instance address mount_path='/storage', learning_rate="0.01", learning_steps="10000", batch_size="256", warmpup_count="100", model_name="mnist", application_name="mnist-app", signature_name="predict", acceptable_accuracy="0.90", requests_delay="4", recurring_run="0", ): storage_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="storage") storage_volume = k8s.V1Volume(name="storage", persistent_volume_claim=storage_pvc) storage_volume_mount = k8s.V1VolumeMount( mount_path="{{workflow.parameters.mount-path}}", name="storage") hydrosphere_address_env = k8s.V1EnvVar( name="CLUSTER_ADDRESS", value="{{workflow.parameters.hydrosphere-address}}") mount_path_env = k8s.V1EnvVar( name="MOUNT_PATH", value="{{workflow.parameters.mount-path}}") model_name_env = k8s.V1EnvVar( name="MODEL_NAME", value="{{workflow.parameters.model-name}}") application_name_env = k8s.V1EnvVar( name="APPLICATION_NAME", value="{{workflow.parameters.application-name}}") signature_name_env = k8s.V1EnvVar( name="SIGNATURE_NAME", value="{{workflow.parameters.signature-name}}") acceptable_accuracy_env = k8s.V1EnvVar( name="ACCEPTABLE_ACCURACY", value="{{workflow.parameters.acceptable-accuracy}}") learning_rate_env = k8s.V1EnvVar( name="LEARNING_RATE", value="{{workflow.parameters.learning-rate}}") learning_steps_env = k8s.V1EnvVar( name="LEARNING_STEPS", value="{{workflow.parameters.learning-steps}}") batch_size_env = k8s.V1EnvVar( name="BATCH_SIZE", value="{{workflow.parameters.batch-size}}") warmup_count_env = k8s.V1EnvVar( name="WARMUP_IMAGES_AMOUNT", value="{{workflow.parameters.warmpup-count}}") requests_delay_env = k8s.V1EnvVar( name="REQUESTS_DELAY", value="{{workflow.parameters.requests-delay}}") recurring_run_env = k8s.V1EnvVar( name="RECURRING_RUN", value="{{workflow.parameters.recurring-run}}") # # 1. Make a sample of production data for retraining sample = dsl.ContainerOp( name="sample", image="tidylobster/mnist-pipeline-sampling:latest") # <-- Replace with correct docker image sample.add_volume(storage_volume) sample.add_volume_mount(storage_volume_mount) sample.add_env_variable(mount_path_env) sample.add_env_variable(hydrosphere_address_env) sample.add_env_variable(application_name_env) # 2. Train and save a MNIST classifier using Tensorflow train = dsl.ContainerOp( name="train", image="tidylobster/mnist-pipeline-train:latest", # <-- Replace with correct docker image file_outputs={"accuracy": "/accuracy.txt"}) train.after(sample) train.set_memory_request('2G') train.set_cpu_request('1') train.add_volume(storage_volume) train.add_volume_mount(storage_volume_mount) train.add_env_variable(mount_path_env) train.add_env_variable(learning_rate_env) train.add_env_variable(learning_steps_env) train.add_env_variable(batch_size_env) train.add_env_variable(recurring_run_env) # 3. Upload trained model to the cluster upload = dsl.ContainerOp( name="upload", image="tidylobster/mnist-pipeline-upload:latest", # <-- Replace with correct docker image file_outputs={"model-version": "/model-version.txt"}, arguments=[train.outputs["accuracy"]]) upload.after(train) upload.add_volume(storage_volume) upload.add_volume_mount(storage_volume_mount) upload.add_env_variable(mount_path_env) upload.add_env_variable(model_name_env) upload.add_env_variable(hydrosphere_address_env) upload.add_env_variable(learning_rate_env) upload.add_env_variable(learning_steps_env) upload.add_env_variable(batch_size_env) # 4. Pre-deploy application predeploy = dsl.ContainerOp( name="predeploy", image="tidylobster/mnist-pipeline-predeploy:latest", # <-- Replace with correct docker image arguments=[upload.outputs["model-version"]], file_outputs={"predeploy-app-name": "/predeploy-app-name.txt"}) predeploy.after(upload) predeploy.add_env_variable(hydrosphere_address_env) predeploy.add_env_variable(application_name_env) predeploy.add_env_variable(model_name_env) # 5. Test the model test = dsl.ContainerOp( name="test", image="tidylobster/mnist-pipeline-test:latest", # <-- Replace with correct docker image arguments=[predeploy.outputs["predeploy-app-name"]]) test.set_retry(3) test.after(predeploy) test.add_volume(storage_volume) test.add_volume_mount(storage_volume_mount) test.add_env_variable(mount_path_env) test.add_env_variable(hydrosphere_address_env) test.add_env_variable(application_name_env) test.add_env_variable(signature_name_env) test.add_env_variable(warmup_count_env) test.add_env_variable(acceptable_accuracy_env) test.add_env_variable(requests_delay_env) test.add_env_variable(recurring_run_env) # 6. Remove predeploy application rm_predeploy = dsl.ContainerOp( name="remove-predeploy", image="tidylobster/mnist-pipeline-rm-predeploy:latest", # <-- Replace with correct docker image arguments=[predeploy.outputs["predeploy-app-name"]]) rm_predeploy.after(test) rm_predeploy.add_env_variable(hydrosphere_address_env) # 7. Deploy application deploy = dsl.ContainerOp( name="deploy", image="tidylobster/mnist-pipeline-deploy:latest", # <-- Replace with correct docker image arguments=[upload.outputs["model-version"]]) deploy.after(test) deploy.add_env_variable(hydrosphere_address_env) deploy.add_env_variable(application_name_env) deploy.add_env_variable(model_name_env)
def get_kubeadmin_password(version, platform, profile): return k8s.V1EnvVar( name="KUBEADMIN_PASSWORD", value_from=k8s.V1EnvVarSource(secret_key_ref=k8s.V1SecretKeySelector( name=f"{version}-{platform}-{profile}-kubeadmin", key="KUBEADMIN_PASSWORD")))
def to_v1_kubernetes_pod(self): """ Convert to support k8s V1Pod :return: k8s.V1Pod """ import kubernetes.client.models as k8s meta = k8s.V1ObjectMeta( labels=self.labels, name=self.name, namespace=self.namespace, annotations=self.annotations, ) if self.image_pull_secrets: image_pull_secrets = [ k8s.V1LocalObjectReference(i) for i in self.image_pull_secrets.split(",") ] else: image_pull_secrets = [] spec = k8s.V1PodSpec( init_containers=self.init_containers, containers=[ k8s.V1Container( image=self.image, command=self.cmds, env_from=[], name="base", env=[ k8s.V1EnvVar(name=key, value=val) for key, val in self.envs.items() ], args=self.args, image_pull_policy=self.image_pull_policy, ) ], image_pull_secrets=image_pull_secrets, service_account_name=self.service_account_name, node_selector=self.node_selectors, dns_policy=self.dnspolicy, host_network=self.hostnetwork, tolerations=self.tolerations, affinity=self.affinity, security_context=self.security_context, ) pod = k8s.V1Pod( spec=spec, metadata=meta, ) for configmap_name in self.configmaps: env_var = k8s.V1EnvFromSource( config_map_ref=k8s.V1ConfigMapEnvSource(name=configmap_name, )) pod.spec.containers[0].env_from.append(env_var) for port in _extract_ports(self.ports): pod = port.attach_to_pod(pod) volumes = _extract_volumes(self.volumes) for volume in volumes: pod = volume.attach_to_pod(pod) for volume_mount in _extract_volume_mounts(self.volume_mounts): pod = volume_mount.attach_to_pod(pod) for secret in self.secrets: pod = secret.attach_to_pod(pod) for runtime_info in self.pod_runtime_info_envs: pod = runtime_info.attach_to_pod(pod) pod = _extract_resources(self.resources).attach_to_pod(pod) return pod
schedule_interval=None, max_active_tasks=10, max_active_runs=10) as dag: load_resources = V1ResourceRequirements(requests={"memory": "18Gi"}, limits={"memory": "18Gi"}) node_selector = {"loader-node": "true"} image_pull_secrets = [k8s.V1LocalObjectReference('falkonry-pull-secret')] envs = list() load_file = "{{ dag_run.conf['load_file'] }}" compact_file = "{{ dag_run.conf['compact_file'] }}" task_id = "{{ dag_run.conf['task_id'] }}" envs.append( k8s.V1EnvVar(name="AWS_SECRET_ACCESS_KEY", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( key="secret-id", name="aws-key-prod")))) envs.append( k8s.V1EnvVar(name="AWS_DEFAULT_REGION", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( key="region-w2", name="aws-key-prod")))) envs.append( k8s.V1EnvVar(name="AWS_REGION", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector( key="region-w2", name="aws-key-prod")))) envs.append( k8s.V1EnvVar(name="AWS_ACCESS_KEY_ID", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector(
configmaps = ['test-configmap-1', 'test-configmap-2'] volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) # [END howto_operator_k8s_cluster_resources] port = Port('http', 80) init_container_volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ] init_environments = [ k8s.V1EnvVar(name='key1', value='value1'), k8s.V1EnvVar(name='key2', value='value2') ] init_container = k8s.V1Container( name="init-container", image="ubuntu:16.04", env=init_environments, volume_mounts=init_container_volume_mounts, command=["bash", "-cx"], args=["echo 10"], ) affinity = { 'nodeAffinity': { 'preferredDuringSchedulingIgnoredDuringExecution': [{
'start_date': datetime.utcnow() } with DAG('load_optimized', default_args=default_args, schedule_interval=None, max_active_tasks=20, max_active_runs=20) as dag: load_resources = V1ResourceRequirements(requests={"memory": "18Gi"}, limits={"memory": "18Gi"}) node_selector = {"loader-node": "true"} image_pull_secrets = [k8s.V1LocalObjectReference('falkonry-pull-secret')] envs = list() load_file = "{{ dag_run.conf['load_file'] }}" compact_file = "{{ dag_run.conf['compact_file'] }}" task_id = "{{ dag_run.conf['task_id'] }}" envs.append(k8s.V1EnvVar(name="AWS_SECRET_ACCESS_KEY", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector(key="secret-id", name="aws-key-prod")) ) ) envs.append(k8s.V1EnvVar(name="AWS_DEFAULT_REGION", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector(key="region-w2", name="aws-key-prod")) ) ) envs.append(k8s.V1EnvVar(name="AWS_REGION", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector(key="region-w2", name="aws-key-prod")) ) ) envs.append(k8s.V1EnvVar(name="AWS_ACCESS_KEY_ID", value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector(key="key-id", name="aws-key-prod"))
def pipeline_definition( hydrosphere_name="local", hydrosphere_address="http://hydro-serving-sidecar-serving.kubeflow.svc.cluster.local:8080", data_directory='/data/mnist', models_directory="/models/mnist", learning_rate="0.01", learning_steps="5000", batch_size="256", warmpup_count="100", model_name="mnist", application_name="mnist-app", signature_name="predict", acceptable_accuracy="0.90", ): data_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="data") models_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="models") data_volume = k8s.V1Volume(name="data", persistent_volume_claim=data_pvc) models_volume = k8s.V1Volume(name="models", persistent_volume_claim=models_pvc) data_volume_mount = k8s.V1VolumeMount( mount_path="{{workflow.parameters.data-directory}}", name="data") models_volume_mount = k8s.V1VolumeMount( mount_path="{{workflow.parameters.models-directory}}", name="models") hydrosphere_address_env = k8s.V1EnvVar( name="CLUSTER_ADDRESS", value="{{workflow.parameters.hydrosphere-address}}") hydrosphere_name_env = k8s.V1EnvVar( name="CLUSTER_NAME", value="{{workflow.parameters.hydrosphere-name}}") data_directory_env = k8s.V1EnvVar( name="MNIST_DATA_DIR", value="{{workflow.parameters.data-directory}}") models_directory_env = k8s.V1EnvVar( name="MNIST_MODELS_DIR", value="{{workflow.parameters.models-directory}}") model_name_env = k8s.V1EnvVar(name="MODEL_NAME", value="{{workflow.parameters.model-name}}") application_name_env = k8s.V1EnvVar( name="APPLICATION_NAME", value="{{workflow.parameters.application-name}}") signature_name_env = k8s.V1EnvVar( name="SIGNATURE_NAME", value="{{workflow.parameters.signature-name}}") acceptable_accuracy_env = k8s.V1EnvVar( name="ACCEPTABLE_ACCURACY", value="{{workflow.parameters.acceptable-accuracy}}") learning_rate_env = k8s.V1EnvVar( name="LEARNING_RATE", value="{{workflow.parameters.learning-rate}}") learning_steps_env = k8s.V1EnvVar( name="LEARNING_STEPS", value="{{workflow.parameters.learning-steps}}") batch_size_env = k8s.V1EnvVar(name="BATCH_SIZE", value="{{workflow.parameters.batch-size}}") warmup_count_env = k8s.V1EnvVar( name="WARMUP_IMAGES_AMOUNT", value="{{workflow.parameters.warmpup-count}}") # 1. Download MNIST data download = dsl.ContainerOp( name="download", image="tidylobster/mnist-pipeline-download:latest") download.add_volume(data_volume) download.add_volume_mount(data_volume_mount) download.add_env_variable(data_directory_env) # 2. Train and save a MNIST classifier using Tensorflow train = dsl.ContainerOp(name="train", image="tidylobster/mnist-pipeline-train:latest") train.after(download) train.set_memory_request('2G') train.set_cpu_request('1') train.add_volume(data_volume) train.add_volume(models_volume) train.add_volume_mount(data_volume_mount) train.add_volume_mount(models_volume_mount) train.add_env_variable(data_directory_env) train.add_env_variable(models_directory_env) train.add_env_variable(learning_rate_env) train.add_env_variable(learning_steps_env) train.add_env_variable(batch_size_env) # 3. Upload trained model to the cluster upload = dsl.ContainerOp( name="upload", image="tidylobster/mnist-pipeline-upload:latest", file_outputs={"model_version": "/model_version.txt"}) upload.after(train) upload.add_volume(models_volume) upload.add_volume_mount(models_volume_mount) upload.add_env_variable(models_directory_env) upload.add_env_variable(model_name_env) upload.add_env_variable(hydrosphere_name_env) upload.add_env_variable(hydrosphere_address_env) # 4. Deploy application deploy = dsl.ContainerOp(name="deploy", image="tidylobster/mnist-pipeline-deploy:latest", arguments=[upload.outputs["model_version"]]) deploy.after(upload) deploy.add_env_variable(hydrosphere_name_env) deploy.add_env_variable(hydrosphere_address_env) deploy.add_env_variable(application_name_env) deploy.add_env_variable(model_name_env) # 5. Test the model test = dsl.ContainerOp(name="test", image="tidylobster/mnist-pipeline-test:latest") test.after(deploy) test.add_volume(data_volume) test.add_volume_mount(data_volume_mount) test.add_env_variable(data_directory_env) test.add_env_variable(hydrosphere_address_env) test.add_env_variable(application_name_env) test.add_env_variable(signature_name_env) test.add_env_variable(warmup_count_env) test.add_env_variable(acceptable_accuracy_env) # 6. Clean environment clean = dsl.ContainerOp(name="clean", image="tidylobster/mnist-pipeline-clean:latest") clean.after(test) clean.add_volume(data_volume) clean.add_volume_mount(data_volume_mount) clean.add_env_variable(data_directory_env) clean.add_volume(models_volume) clean.add_volume_mount(models_volume_mount) clean.add_env_variable(models_directory_env)
#config.load_kube_config(config_file=options.kubeconfig) client.configuration.api_key['authorization'] = options.apikey client.configuration.host = options.host v1 = client.CoreV1Api() v1Batch = client.BatchV1Api() #define data structure job = models.V1Job() job_meta = models.V1ObjectMeta() job_spec = models.V1JobSpec() pod_spec_template = models.V1PodTemplateSpec() pod_spec = models.V1PodSpec() pod_meta = models.V1ObjectMeta() pod_spec_container = models.V1Container() env_var = models.V1EnvVar() #populate data structure #father object job.api_version = "batch/v1" job.kind = "Job" #job metadata job_meta.name = "p1" job.metadata = job_meta #containers spec 172.30.129.159:5000/testjob/worker pod_spec_container.name = "c1" pod_spec_container.image = options.image #job spec
def __init__( # pylint: disable=too-many-arguments,too-many-locals self, image, name=None, namespace=None, volume_mounts=None, envs=None, cmds=None, args=None, labels=None, node_selectors=None, ports=None, volumes=None, image_pull_policy='IfNotPresent', restart_policy='Never', image_pull_secrets=None, init_containers=None, service_account_name=None, resources=None, annotations=None, affinity=None, hostnetwork=False, tolerations=None, security_context=None, configmaps=None, dnspolicy=None, pod=None, extract_xcom=False, ): self.ud_pod = pod self.pod = k8s.V1Pod() self.pod.api_version = 'v1' self.pod.kind = 'Pod' # Pod Metadata self.metadata = k8s.V1ObjectMeta() self.metadata.labels = labels self.metadata.name = name + "-" + str(uuid.uuid4())[:8] if name else None self.metadata.namespace = namespace self.metadata.annotations = annotations # Pod Container self.container = k8s.V1Container(name='base') self.container.image = image self.container.env = [] if envs: if isinstance(envs, dict): for key, val in envs.items(): self.container.env.append(k8s.V1EnvVar( name=key, value=val )) elif isinstance(envs, list): self.container.env.extend(envs) configmaps = configmaps or [] self.container.env_from = [] for configmap in configmaps: self.container.env_from.append(k8s.V1EnvFromSource( config_map_ref=k8s.V1ConfigMapEnvSource( name=configmap ) )) self.container.command = cmds or [] self.container.args = args or [] self.container.image_pull_policy = image_pull_policy self.container.ports = ports or [] self.container.resources = resources self.container.volume_mounts = volume_mounts or [] # Pod Spec self.spec = k8s.V1PodSpec(containers=[]) self.spec.security_context = security_context self.spec.tolerations = tolerations self.spec.dns_policy = dnspolicy self.spec.host_network = hostnetwork self.spec.affinity = affinity self.spec.service_account_name = service_account_name self.spec.init_containers = init_containers self.spec.volumes = volumes or [] self.spec.node_selector = node_selectors self.spec.restart_policy = restart_policy self.spec.image_pull_secrets = [] if image_pull_secrets: for image_pull_secret in image_pull_secrets.split(','): self.spec.image_pull_secrets.append(k8s.V1LocalObjectReference( name=image_pull_secret )) # Attach sidecar self.extract_xcom = extract_xcom
def test_init_container(self): # GIVEN volume_mounts = [ k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True) ] init_environments = [ k8s.V1EnvVar(name='key1', value='value1'), k8s.V1EnvVar(name='key2', value='value2'), ] init_container = k8s.V1Container( name="init-container", image="ubuntu:16.04", env=init_environments, volume_mounts=volume_mounts, command=["bash", "-cx"], args=["echo 10"], ) volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) expected_init_container = { 'name': 'init-container', 'image': 'ubuntu:16.04', 'command': ['bash', '-cx'], 'args': ['echo 10'], 'env': [{ 'name': 'key1', 'value': 'value1' }, { 'name': 'key2', 'value': 'value2' }], 'volumeMounts': [{ 'mountPath': '/etc/foo', 'name': 'test-volume', 'readOnly': True }], } k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo 10"], labels={"foo": "bar"}, name="test", task_id="task", volumes=[volume], init_containers=[init_container], in_cluster=False, do_xcom_push=False, ) context = create_context(k) k.execute(context) actual_pod = self.api_client.sanitize_for_serialization(k.pod) self.expected_pod['spec']['initContainers'] = [expected_init_container] self.expected_pod['spec']['volumes'] = [{ 'name': 'test-volume', 'persistentVolumeClaim': { 'claimName': 'test-volume' } }] assert self.expected_pod == actual_pod
def test_to_env_secret(self): secret = Secret('env', 'name', 'secret', 'key') assert secret.to_env_secret() == k8s.V1EnvVar( name='NAME', value_from=k8s.V1EnvVarSource(secret_key_ref=k8s.V1SecretKeySelector(name='secret', key='key')), )
def setUp(self): self.static_uuid = uuid.UUID('cf4a56d2-8101-4217-b027-2af6216feb48') self.deserialize_result = { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': {'name': 'memory-demo', 'namespace': 'mem-example'}, 'spec': { 'containers': [ { 'args': ['--vm', '1', '--vm-bytes', '150M', '--vm-hang', '1'], 'command': ['stress'], 'image': 'apache/airflow:stress-2020.07.10-1.0.4', 'name': 'memory-demo-ctr', 'resources': {'limits': {'memory': '200Mi'}, 'requests': {'memory': '100Mi'}}, } ] }, } self.envs = {'ENVIRONMENT': 'prod', 'LOG_LEVEL': 'warning'} self.secrets = [ # This should be a secretRef Secret('env', None, 'secret_a'), # This should be a single secret mounted in volumeMounts Secret('volume', '/etc/foo', 'secret_b'), # This should produce a single secret mounted in env Secret('env', 'TARGET', 'secret_b', 'source_b'), ] self.execution_date = parser.parse('2020-08-24 00:00:00.000000') self.execution_date_label = datetime_to_label_safe_datestring(self.execution_date) self.dag_id = 'dag_id' self.task_id = 'task_id' self.try_number = 3 self.labels = { 'airflow-worker': 'uuid', 'dag_id': self.dag_id, 'execution_date': self.execution_date_label, 'task_id': self.task_id, 'try_number': str(self.try_number), 'airflow_version': __version__.replace('+', '-'), 'kubernetes_executor': 'True', } self.annotations = { 'dag_id': self.dag_id, 'task_id': self.task_id, 'execution_date': self.execution_date.isoformat(), 'try_number': str(self.try_number), } self.metadata = { 'labels': self.labels, 'name': 'pod_id-' + self.static_uuid.hex, 'namespace': 'namespace', 'annotations': self.annotations, } self.resources = k8s.V1ResourceRequirements( requests={ "cpu": 1, "memory": "1Gi", "ephemeral-storage": "2Gi", }, limits={"cpu": 2, "memory": "2Gi", "ephemeral-storage": "4Gi", 'nvidia.com/gpu': 1}, ) self.k8s_client = ApiClient() self.expected = k8s.V1Pod( api_version="v1", kind="Pod", metadata=k8s.V1ObjectMeta( namespace="default", name='myapp-pod-' + self.static_uuid.hex, labels={'app': 'myapp'}, ), spec=k8s.V1PodSpec( containers=[ k8s.V1Container( name='base', image='busybox', command=['sh', '-c', 'echo Hello Kubernetes!'], env=[ k8s.V1EnvVar(name='ENVIRONMENT', value='prod'), k8s.V1EnvVar( name="LOG_LEVEL", value='warning', ), k8s.V1EnvVar( name='TARGET', value_from=k8s.V1EnvVarSource( secret_key_ref=k8s.V1SecretKeySelector(name='secret_b', key='source_b') ), ), ], env_from=[ k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource(name='configmap_a')), k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource(name='configmap_b')), k8s.V1EnvFromSource(secret_ref=k8s.V1SecretEnvSource(name='secret_a')), ], ports=[k8s.V1ContainerPort(name="foo", container_port=1234)], resources=k8s.V1ResourceRequirements( requests={'memory': '100Mi'}, limits={ 'memory': '200Mi', }, ), ) ], security_context=k8s.V1PodSecurityContext( fs_group=2000, run_as_user=1000, ), host_network=True, image_pull_secrets=[ k8s.V1LocalObjectReference(name="pull_secret_a"), k8s.V1LocalObjectReference(name="pull_secret_b"), ], ), )
name="test-volume", persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource( claim_name="test-volume"), ) port = k8s.V1ContainerPort(name="http", container_port=80) init_container_volume_mounts = [ k8s.V1VolumeMount(mount_path="/etc/foo", name="test-volume", sub_path=None, read_only=True) ] init_environments = [ k8s.V1EnvVar(name="key1", value="value1"), k8s.V1EnvVar(name="key2", value="value2"), ] init_container = k8s.V1Container( name="init-container", image="ubuntu:16.04", env=init_environments, volume_mounts=init_container_volume_mounts, command=["bash", "-cx"], args=["echo 10"], ) affinity = k8s.V1Affinity( node_affinity=k8s.V1NodeAffinity( preferred_during_scheduling_ignored_during_execution=[