Exemplo n.º 1
0
def load_incluster_config_with_token(token: str):
    token_filename = '/tmp/token'
    with open(token_filename, 'w') as token_file:
        token_file.write(token)

    loader = InClusterConfigLoader(
        token_filename=token_filename,
        cert_filename=SERVICE_CERT_FILENAME)
    loader.load_and_set()
Exemplo n.º 2
0
def start_job(definition):
    print("deploying build baseimage job!".format())

    from kubernetes import client, config

    if settings.EXTERNAL_KUBECONF:
        config.load_kube_config('cluster.conf')
    else:
        if 'TELEPRESENCE_ROOT' in os.environ:
            from kubernetes.config.incluster_config import (
                SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME,
                InClusterConfigLoader)
            token_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/')
            cert_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/')

            InClusterConfigLoader(token_filename=token_filename,
                                  cert_filename=cert_filename).load_and_set()
        else:
            config.load_incluster_config()

    api = client.BatchV1Api()

    # create the resource
    api.create_namespaced_job(
        namespace=settings.NAMESPACE,
        body=definition,
    )
    print("Resource created")
Exemplo n.º 3
0
def run_job(instance):
    print("deploying job with {}!".format(instance))

    from kubernetes import client, config

    if settings.EXTERNAL_KUBECONF:
        config.load_kube_config('cluster.conf')
    else:
        if 'TELEPRESENCE_ROOT' in os.environ:
            from kubernetes.config.incluster_config import (
                SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME,
                InClusterConfigLoader)
            token_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/')
            cert_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/')

            InClusterConfigLoader(token_filename=token_filename,
                                  cert_filename=cert_filename).load_and_set()
        else:
            config.load_incluster_config()

    api = client.BatchV1Api()

    yaml_definition = get_instance_from_definition(instance)

    # create the resource
    api.create_namespaced_job(
        namespace=settings.NAMESPACE,
        body=yaml_definition,
    )
    print("Resource created")

    # get the resource and print out data
    print("getting logs:")
    resource = api.read_namespaced_job(
        name=str(instance.id),
        namespace=settings.NAMESPACE,
    )
    print("got logs?")
    # resource = api.list_namespaced_job(
    #   namespace="stack-fn",
    # )
    print("Resources details:")
    pprint(resource)
Exemplo n.º 4
0
def main():
    log('Started NamespaceWatcher')

    import os

    if 'SERVICE_TOKEN_FILENAME' in os.environ:
        InClusterConfigLoader(
            token_filename=os.environ.get('SERVICE_TOKEN_FILENAME'),
            cert_filename=os.environ.get(
                'SERVICE_CERT_FILENAME')).load_and_set()
    else:
        config.load_incluster_config()

    client.configuration.verify_ssl = False

    nw = NamespaceWatcher(watch)
    nw.start()
Exemplo n.º 5
0
def main():
    log('Started JobPrunner')

    import os
    deadline_hours = int(os.environ.get('DEADLINE_HOURS', 24))
    log('Job deadline {}h'.format(deadline_hours))

    if 'SERVICE_TOKEN_FILENAME' in os.environ:
        InClusterConfigLoader(
            token_filename=os.environ.get('SERVICE_TOKEN_FILENAME'),
            cert_filename=os.environ.get(
                'SERVICE_CERT_FILENAME')).load_and_set()
    else:
        config.load_incluster_config()

    client.configuration.verify_ssl = False

    jp = JobPrunner(watch=watch.Watch())
    jp.start(deadline_hours * 60 * 60)
Exemplo n.º 6
0
def get_logs(experiment):
    from kubernetes import client, config

    if settings.EXTERNAL_KUBECONF:
        config.load_kube_config('cluster.conf')
    else:
        if 'TELEPRESENCE_ROOT' in os.environ:
            from kubernetes.config.incluster_config import (
                SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME,
                InClusterConfigLoader)
            token_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/')
            cert_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/')

            InClusterConfigLoader(token_filename=token_filename,
                                  cert_filename=cert_filename).load_and_set()
        else:
            config.load_incluster_config()

    api = client.BatchV1Api()

    ret = api.read_namespaced_job(
        name=str(experiment.id),
        namespace=settings.NAMESPACE,
    )
    print("getting job name:")
    job_name = ret.metadata.labels['job-name']

    api = client.CoreV1Api()
    ret = api.list_namespaced_pod(
        namespace=settings.NAMESPACE,
        label_selector='job-name={}'.format(job_name))

    ret = api.read_namespaced_pod_log(name=ret.items[0].metadata.name,
                                      namespace=settings.NAMESPACE)

    return ret
Exemplo n.º 7
0
def __generate_and_apply_configmaps(project):
    from kubernetes import client, config
    from kubernetes.client.rest import ApiException
    from pprint import pprint

    if settings.EXTERNAL_KUBECONF:
        config.load_kube_config('cluster.conf')
    else:
        # adjust k8s service account paths if running inside telepresence
        if 'TELEPRESENCE_ROOT' in os.environ:
            from kubernetes.config.incluster_config import (
                SERVICE_CERT_FILENAME, SERVICE_TOKEN_FILENAME,
                InClusterConfigLoader)
            token_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_TOKEN_FILENAME).relative_to('/')
            cert_filename = Path(
                os.getenv('TELEPRESENCE_ROOT',
                          '/')) / Path(SERVICE_CERT_FILENAME).relative_to('/')

            InClusterConfigLoader(token_filename=token_filename,
                                  cert_filename=cert_filename).load_and_set()
        else:
            config.load_incluster_config()

    api = client.CoreV1Api()

    try:
        api_response = api.delete_namespaced_config_map(
            namespace=settings.NAMESPACE,
            name="{}-dockerfile".format(project.name),
        )
        pprint(api_response)

    except ApiException as e:
        print(
            "Exception when calling CoreV1Api->delete_namespaced_config_map: %s\n"
            % e)

    metadata = client.V1ObjectMeta(
        name="{}-dockerfile".format(project.name),
        namespace=settings.NAMESPACE,
    )
    # Instantiate the configmap object
    configmap = client.V1ConfigMap(
        api_version="v1",
        kind="ConfigMap",
        # How do I modify here ?
        data=dict(Dockerfile=str(project.environment.dockerfile)),
        metadata=metadata)
    try:
        api_response = api.create_namespaced_config_map(
            namespace=settings.NAMESPACE,
            body=configmap,
            pretty='pretty_example',
        )
        pprint(api_response)

    except ApiException as e:
        print(
            "Exception when calling CoreV1Api->create_namespaced_config_map: %s\n"
            % e)
Exemplo n.º 8
0
def main(token_file=None, cert_file=None, config_file=None):
    logging.basicConfig(level=logging.DEBUG,
                        format="(%(threadName)-9s) %(message)s")
    # logging.getLogger("requests").setLevel(logging.CRITICAL)
    logging.getLogger("urllib3").setLevel(logging.CRITICAL)

    kubernetes_verify_tls = get_param("KUBERNETES_VERIFY_TLS", None, "0")
    kubernetes_verify_tls = bool(int(kubernetes_verify_tls))

    # Load in-cluster configuration that is exposed by OpenShift/k8s configuration.
    InClusterConfigLoader(
        token_filename=_get_incluster_token_file(token_file=token_file),
        cert_filename=_get_incluster_ca_file(ca_file=cert_file),
        environ=os.environ,
    ).load_and_set()

    # We need to explicitly set whether we want to verify SSL/TLS connection to the master.
    configuration = client.Configuration()
    configuration.verify_ssl = kubernetes_verify_tls

    ocp_client = DynamicClient(client.ApiClient(configuration=configuration))
    host = client.Configuration().host
    api_key = client.Configuration().api_key
    namespace = get_namespace()

    plugin = TensorflowBuildPlugin()
    # login_checks = [check_none(v) for v in [OCP_URL, DEFAULT_NAMESPACE, ACCESS_TOKEN]]
    # if not all(login_checks):
    #     raise Exception("Release Trigger can't start! OCP credentials are not provided!")

    # TODO may use config.json or use CRD
    # Load BUILD_MAP
    build_map = os.getenv(ENV_BUILD_MAP, "{}")
    build_map = json.loads(build_map)
    if str(build_map) == "{}":
        build_map = load_json_file(config_file)

    if not build_map:
        raise Exception("No BUILD_MAP loaded.Nothing todo")

    imagestream_list = []
    buildconfig_list = []
    job_list = []
    object_map = {}
    object_map.update(plugin.get_labels_dict())

    # Process BUILD_MAP
    for py_version, os_details in build_map.items():
        for os_version, image_details in os_details.items():
            try:
                application_build_name = "tf-{}-build-image-{}".format(
                    os_version.lower(), py_version.replace(".", ""))
                application_name = "tf-{}-build-job-{}".format(
                    os_version.lower(), py_version.replace(".", ""))
                builder_imagestream = "{}:{}".format(application_build_name,
                                                     DEFAULT_IMAGE_VERSION)
                nb_python_ver = py_version
                docker_file_path = "Dockerfile.{}".format(os_version.lower())
                logging.debug(
                    "-------------------VARIABLES-------------------------")
                logging.debug("APPLICATION_BUILD_NAME: {}".format(
                    application_build_name))
                logging.debug("APPLICATION_NAME: {}".format(application_name))
                logging.debug(
                    "BUILDER_IMAGESTREAM: {}".format(builder_imagestream))
                logging.debug("PYTHON VERSION: {}".format(nb_python_ver))
                logging.debug("DOCKERFILE: {}".format(docker_file_path))
                for var_key, var_val in image_details.items():
                    # self.__dict__[var_key] = var_val
                    logging.debug("{}: {}".format(var_key, var_val))
                logging.debug(
                    "-----------------------------------------------------")
                imagestream_template = plugin.fill_imagestream_template(
                    ims_name=application_build_name)
                imagestream_list.append({
                    "kind": "ImageStream",
                    "object": imagestream_template,
                    "trigger_count": 0,
                    "retrigger": False,
                })
                job_template = plugin.fill_job_template1(
                    application_name=application_name,
                    builder_imagestream=builder_imagestream,
                    nb_python_ver=nb_python_ver,
                    image_details=image_details,
                )
                object_map[application_name] = job_template
                job_list.append(job_template)
                build_template = plugin.fill_buildconfig_template1(
                    build_name=application_build_name,
                    docker_file_path=docker_file_path,
                    nb_python_ver=nb_python_ver,
                    image_details=image_details,
                )
                object_map[application_build_name] = build_template

                buildconfig_list.append({
                    "kind": "BuildConfig",
                    "object": build_template,
                    "trigger_count": 0,
                    "retrigger": False,
                    "application_name": application_name,
                    "builder_imagestream": builder_imagestream,
                    "nb_python_ver": nb_python_ver,
                })
            except Exception as e:
                logging.error("Exception: ", e)
                logging.error(
                    "Error in Tensorflow Build or Job trigger! Please refer the above log, Starting the next "
                    "one in queue!")

    for ims in imagestream_list:
        ims_name = ims["object"]["metadata"]["name"]

        ims_exist, ims_response = get_imagestream(
            req_url=host,
            req_headers=get_header(api_key),
            namespace=namespace,
            imagestream_name=ims_name,
        )
        if not ims_exist:
            generated_img = create_imagestream(
                req_url=host,
                req_headers=get_header(api_key),
                namespace=namespace,
                imagestream=ims["object"],
            )
            if not generated_img:
                raise Exception(
                    "Image could not be generated for {}".format(ims_name))

    quota_event = threading.Condition()
    done_event = threading.Event()
    global_count = ResourceCounter()
    task_q = Queue(maxsize=1000)
    bloom = BloomFilter(10000, 0.001)

    # TODO TFBuildConfig  OpenBlasBuildConfig, numpy
    for y in buildconfig_list:
        task_q.put(y)

    # global_count.set_val(task_q.qsize())
    logging.debug("Q size {}".format(task_q.qsize()))
    quota_name = get_param("QUOTA_NAME", None, DEFAULT_QUOTA_NAME)
    quota_thread = threading.Thread(
        name="quota-thread",
        target=quota_check,
        args=(quota_name, quota_event, done_event, task_q, global_count),
    )

    resource_thread = threading.Thread(
        name="resource-thread",
        target=create_resource,
        args=(quota_event, done_event, task_q, global_count, object_map),
    )

    event_thread = threading.Thread(
        name="event-thread",
        target=event_loop,
        args=("events", bloom, object_map, task_q, global_count),
    )

    # event_thread.daemon = True
    event_thread.start()
    time.sleep(3)
    quota_thread.start()
    resource_thread.start()
    event_thread.join()
    resource_thread.join()
    quota_thread.join()
    logging.debug("END")
Exemplo n.º 9
0
    SERVICE_TOKEN_FILENAME,
    InClusterConfigLoader,
)

from .. import config

# adjust k8s service account paths if running inside telepresence
tele_root = Path(os.getenv("TELEPRESENCE_ROOT", "/"))

token_filename = tele_root / Path(SERVICE_TOKEN_FILENAME).relative_to("/")
cert_filename = tele_root / Path(SERVICE_CERT_FILENAME).relative_to("/")
namespace_path = tele_root / Path(
    "var/run/secrets/kubernetes.io/serviceaccount/namespace")

try:
    InClusterConfigLoader(token_filename=token_filename,
                          cert_filename=cert_filename).load_and_set()
    v1 = client.CoreV1Api()
except ConfigException:
    v1 = None
    warnings.warn("Unable to configure the kubernetes client.")

try:
    with open(namespace_path, "rt") as f:
        kubernetes_namespace = f.read()
except FileNotFoundError:
    kubernetes_namespace = ""
    warnings.warn(
        "No k8s service account found - not running inside a kubernetes cluster?"
    )

def load_k8s_config():
    InClusterConfigLoader(
        token_filename=SERVICE_TOKEN_FILENAME,
        cert_filename=SERVICE_CERT_FILENAME,
    ).load_and_set()