def wait_for_pods_to_be_deleted( client, namespace, pod_selector, timeout=datetime.timedelta(minutes=5), polling_interval=datetime.timedelta(seconds=30)): """Wait for the specified job to be deleted. Args: client: K8s api client. namespace: Namespace. pod_selector: Selector for the pods. timeout: How long to wait for the job. polling_interval: How often to poll for the status of the job. status_callback: (Optional): Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the job. """ end_time = datetime.datetime.now() + timeout while True: pods = list_pods(client, namespace, pod_selector) logging.info("%s pods matched %s pods", len(pods.items), pod_selector) if not pods.items: return if datetime.datetime.now() + polling_interval > end_time: raise util.TimeoutError("Timeout waiting for pods to be deleted.") time.sleep(polling_interval.seconds)
def wait_for_vm(project, zone, vm, timeout=datetime.timedelta(minutes=5), polling_interval=datetime.timedelta(seconds=10)): """Wait for the VM to be ready. This is measured by trying to ssh into the VM. timeout: A datetime.timedelta expressing the amount of time to wait before giving up. polling_interval: A datetime.timedelta to represent the amount of time to wait between requests polling for the operation status. Raises: TimeoutError: if we timeout waiting for the operation to complete. """ endtime = datetime.datetime.now() + timeout while True: try: util.run([ "gcloud", "compute", "--project=" + project, "ssh", "--zone=" + zone, vm, "--", "echo hello world" ]) logging.info("VM is ready") return except subprocess.CalledProcessError: pass if datetime.datetime.now() > endtime: raise util.TimeoutError(( "Timed out waiting for VM to {0} be sshable. Check firewall rules " "aren't blocking ssh.").format(vm)) time.sleep(polling_interval.total_seconds())
def wait_for_workflows(client, namespace, names, timeout=datetime.timedelta(minutes=30), polling_interval=datetime.timedelta(seconds=30), status_callback=None): """Wait for multiple workflows to finish. Args: client: K8s api client. namespace: namespace for the workflow. names: Names of the workflows to wait for. timeout: How long to wait for the workflow. polling_interval: How often to poll for the status of the workflow. status_callback: (Optional): Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the job. Returns: results: A list of the final status of the workflows. Raises: TimeoutError: If timeout waiting for the job to finish. """ crd_api = k8s_client.CustomObjectsApi(client) end_time = datetime.datetime.now() + timeout while True: all_results = [] for n in names: results = crd_api.get_namespaced_custom_object( GROUP, VERSION, namespace, PLURAL, n) all_results.append(results) if status_callback: status_callback(results) done = True for results in all_results: # Sometimes it takes a while for the argo controller to populate # the status field of an object. if results.get("status", {}).get("phase", "") not in ["Failed", "Succeeded"]: done = False if done: return all_results if datetime.datetime.now() + polling_interval > end_time: raise util.TimeoutError( "Timeout waiting for workflows {0} in namespace {1} to finish." .format(",".join(names), namespace)) time.sleep(polling_interval.seconds) return []
def wait_for_pods_to_be_in_phases( client, namespace, pod_selector, phases, timeout=datetime.timedelta(minutes=15), polling_interval=datetime.timedelta(seconds=30)): """Wait for the pods matching the selector to be in the specified state Args: client: K8s api client. namespace: Namespace. pod_selector: Selector for the pods. phases: List of desired phases timeout: How long to wait for the job. polling_interval: How often to poll for the status of the job. status_callback: (Optional): Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the job. """ time.sleep(polling_interval.seconds) end_time = datetime.datetime.now() + timeout while True: pods = list_pods(client, namespace, pod_selector) logging.info("%s pods matched %s pods", len(pods.items), pod_selector) is_match = True for p in pods.items: if p.status.phase not in phases: # for debug logging.info("pod in phase %s", p.status.phase) is_match = False if is_match and pods.items: logging.info("All pods in phase %s", phases) log_pods(pods) return pods if datetime.datetime.now() + polling_interval > end_time: logging.info("Latest pod phases") log_pods(pods) logging.error("Timeout waiting for pods to be in phase: %s", phases) raise util.TimeoutError( "Timeout waiting for pods to be in states %s" % phases) time.sleep(polling_interval.seconds) return None
def wait_for_delete(client, namespace, name, version="v1alpha1", timeout=datetime.timedelta(minutes=5), polling_interval=datetime.timedelta(seconds=30), status_callback=None): """Wait for the specified job to be deleted. Args: client: K8s api client. namespace: namespace for the job. name: Name of the job. timeout: How long to wait for the job. polling_interval: How often to poll for the status of the job. status_callback: (Optional): Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the job. """ crd_api = k8s_client.CustomObjectsApi(client) end_time = datetime.datetime.now() + timeout while True: try: results = crd_api.get_namespaced_custom_object( tf_job_client.TF_JOB_GROUP, version, namespace, tf_job_client.TF_JOB_PLURAL, name) except rest.ApiException as e: if e.status == httplib.NOT_FOUND: return logging.exception("rest.ApiException thrown") raise if status_callback: status_callback(results) if datetime.datetime.now() + polling_interval > end_time: raise util.TimeoutError( "Timeout waiting for job {0} in namespace {1} to be deleted.". format(name, namespace)) time.sleep(polling_interval.seconds)
def wait_for_workflow(client, namespace, name, timeout=datetime.timedelta(minutes=30), polling_interval=datetime.timedelta(seconds=30), status_callback=None): """Wait for the specified workflow to finish. Args: client: K8s api client. namespace: namespace for the workflow. name: Name of the workflow. timeout: How long to wait for the workflow. polling_interval: How often to poll for the status of the workflow. status_callback: (Optional): Callable. If supplied this callable is invoked after we poll the job. Callable takes a single argument which is the job. Raises: TimeoutError: If timeout waiting for the job to finish. """ crd_api = k8s_client.CustomObjectsApi(client) end_time = datetime.datetime.now() + timeout while True: results = crd_api.get_namespaced_custom_object(GROUP, VERSION, namespace, PLURAL, name) if status_callback: status_callback(results) if results["status"]["phase"] in ["Failed", "Succeeded"]: return results if datetime.datetime.now() + polling_interval > end_time: raise util.TimeoutError( "Timeout waiting for workflow {0} in namespace {1} to finish.". format(name, namespace)) time.sleep(polling_interval.seconds)