示例#1
0
def reset_database(database=[], deployment_target=None):
    """Runs kubectl commands to delete and reset the given database(s).

    Args:
        component (list): one more database labels - "seqrdb", "phenotipsdb", "mongodb"
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """
    if "seqrdb" in database:
        postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)
        if not postgres_pod_name:
            logger.error("postgres pod must be running")
        else:
            run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'" % locals(), errors_to_ignore=["does not exist"])
            run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'" % locals())

    if "phenotipsdb" in database:
        postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)
        if not postgres_pod_name:
            logger.error("postgres pod must be running")
        else:
            run_in_pod(postgres_pod_name, "psql -U xwiki postgres -c 'drop database xwiki'" % locals(), errors_to_ignore=["does not exist"])
            run_in_pod(postgres_pod_name, "psql -U xwiki postgres -c 'create database xwiki'" % locals())
            #run("kubectl exec %(postgres_pod_name)s -- psql -U postgres xwiki < data/init_phenotipsdb.sql" % locals())

    if "mongodb" in database:
        mongo_pod_name = get_pod_name("mongo", deployment_target=deployment_target)
        if not mongo_pod_name:
            logger.error("mongo pod must be running")
        else:
            run_in_pod(mongo_pod_name, "mongo datastore --eval 'db.dropDatabase()'" % locals())
示例#2
0
def reset_database(database=[], deployment_target=None):
    """Runs kubectl commands to delete and reset the given database(s).

    Args:
        component (list): one more database labels - "seqrdb", "phenotipsdb",
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """
    if "seqrdb" in database:
        postgres_pod_name = get_pod_name("postgres",
                                         deployment_target=deployment_target)
        if not postgres_pod_name:
            logger.error("postgres pod must be running")
        else:
            run_in_pod(postgres_pod_name,
                       "psql -U postgres postgres -c 'drop database seqrdb'" %
                       locals(),
                       errors_to_ignore=["does not exist"])
            run_in_pod(
                postgres_pod_name,
                "psql -U postgres postgres -c 'create database seqrdb'" %
                locals())

    if "phenotipsdb" in database:
        postgres_pod_name = get_pod_name("postgres",
                                         deployment_target=deployment_target)
        if not postgres_pod_name:
            logger.error("postgres pod must be running")
        else:
            run_in_pod(postgres_pod_name,
                       "psql -U xwiki postgres -c 'drop database xwiki'" %
                       locals(),
                       errors_to_ignore=["does not exist"])
            run_in_pod(
                postgres_pod_name,
                "psql -U xwiki postgres -c 'create database xwiki'" % locals())
def deploy_seqr(settings):
    print_separator("seqr")

    if settings["BUILD_DOCKER_IMAGES"]:
        seqr_git_hash = run("git log -1 --pretty=%h", errors_to_ignore=["Not a git repository"])
        seqr_git_hash = (":" + seqr_git_hash.strip()) if seqr_git_hash is not None else ""

        docker_build("seqr",
                     settings,
                     [
                         "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"],
                         "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"],
                         "-f deploy/docker/seqr/Dockerfile",
                         "-t %(DOCKER_IMAGE_NAME)s" + seqr_git_hash,
                         ]
                     )

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]
    postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

    if settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("seqr", settings)
    elif reset_db or restore_seqr_db_from_backup:
        seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target)
        if seqr_pod_name:
            sleep_until_pod_is_running("seqr", deployment_target=deployment_target)

            run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh", verbose=True)

    if reset_db:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )

    if restore_seqr_db_from_backup:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", verbose=True)
        run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
    else:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'",
                   errors_to_ignore=["already exists"],
                   verbose=True,
                   )

    deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
示例#4
0
def deploy_seqr(settings):
    print_separator("seqr")

    if settings["BUILD_DOCKER_IMAGES"]:
        seqr_git_hash = run("git log -1 --pretty=%h", errors_to_ignore=["Not a git repository"])
        seqr_git_hash = (":" + seqr_git_hash.strip()) if seqr_git_hash is not None else ""

        docker_build("seqr",
                     settings,
                     [
                         "--build-arg SEQR_SERVICE_PORT=%s" % settings["SEQR_SERVICE_PORT"],
                         "--build-arg SEQR_UI_DEV_PORT=%s" % settings["SEQR_UI_DEV_PORT"],
                         "-f deploy/docker/seqr/Dockerfile",
                         "-t %(DOCKER_IMAGE_NAME)s" + seqr_git_hash,
                         ]
                     )

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    restore_seqr_db_from_backup = settings.get("RESTORE_SEQR_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]
    postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

    if settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("seqr", settings)
    elif reset_db or restore_seqr_db_from_backup:
        seqr_pod_name = get_pod_name('seqr', deployment_target=deployment_target)
        if seqr_pod_name:
            sleep_until_pod_is_running("seqr", deployment_target=deployment_target)

            run_in_pod(seqr_pod_name, "/usr/local/bin/stop_server.sh", verbose=True)

    if reset_db:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )

    if restore_seqr_db_from_backup:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'drop database seqrdb'",
                   errors_to_ignore=["does not exist"],
                   verbose=True,
                   )
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'", verbose=True)
        run("kubectl cp '%(restore_seqr_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "/root/restore_database_backup.sh postgres seqrdb /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod(postgres_pod_name, "rm /root/$(basename %(restore_seqr_db_from_backup)s)" % locals(), verbose=True)
    else:
        run_in_pod(postgres_pod_name, "psql -U postgres postgres -c 'create database seqrdb'",
                   errors_to_ignore=["already exists"],
                   verbose=True,
                   )

    deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
示例#5
0
def delete_component(component, deployment_target=None):
    """Runs kubectl commands to delete any running deployment, service, or pod objects for the given component(s).

    Args:
        component (string): component to delete (eg. 'phenotips' or 'nginx').
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """
    if component == "cockpit":
        run("kubectl delete rc cockpit", errors_to_ignore=["not found"])
    elif component == "es-data":
        run("kubectl delete StatefulSet es-data", errors_to_ignore=["not found"])
    elif component == "nginx":
        run("kubectl delete -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml")

    run("kubectl delete deployments %(component)s" % locals(), errors_to_ignore=["not found"])
    run("kubectl delete services %(component)s" % locals(), errors_to_ignore=["not found"])

    pod_name = get_pod_name(component, deployment_target=deployment_target)
    if pod_name:
        run("kubectl delete pods %(pod_name)s" % locals(), errors_to_ignore=["not found"])

        logger.info("waiting for \"%s\" to exit Running status" % component)
        while is_pod_running(component, deployment_target):
            time.sleep(5)


    # print services and pods status
    run("kubectl get services" % locals(), verbose=True)
    run("kubectl get pods" % locals(), verbose=True)
示例#6
0
def delete_component(component, deployment_target=None):
    """Runs kubectl commands to delete any running deployment, service, or pod objects for the given component(s).

    Args:
        component (string): component to delete (eg. 'phenotips' or 'nginx').
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """
    if component == "cockpit":
        run("kubectl delete rc cockpit", errors_to_ignore=["not found"])
    elif component == "es-data":
        run("kubectl delete StatefulSet es-data",
            errors_to_ignore=["not found"])
    elif component == "nginx":
        run("kubectl delete -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/mandatory.yaml"
            )

    run("kubectl delete deployments %(component)s" % locals(),
        errors_to_ignore=["not found"])
    run("kubectl delete services %(component)s" % locals(),
        errors_to_ignore=["not found"])

    pod_name = get_pod_name(component, deployment_target=deployment_target)
    if pod_name:
        run("kubectl delete pods %(pod_name)s" % locals(),
            errors_to_ignore=["not found"])

        logger.info("waiting for \"%s\" to exit Running status" % component)
        while is_pod_running(component, deployment_target):
            time.sleep(5)

    # print services and pods status
    run("kubectl get services" % locals(), verbose=True)
    run("kubectl get pods" % locals(), verbose=True)
示例#7
0
def update_reference_data(deployment_target):
    """DEPRECATED. Load reference data into mongodb.

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    check_kubernetes_context(deployment_target)

    pod_name = get_pod_name('seqr', deployment_target=deployment_target)
    if not pod_name:
        raise ValueError(
            "No 'seqr' pods found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?"
            % locals())

    # commented out because this is not loaded from settings backup
    #run_in_pod(pod_name, "python2.7 -u manage.py update_all_reference_data --omim-key '$OMIM_KEY'" % locals(), verbose=True, print_command=True)

    run_in_pod(pod_name, "mkdir -p /seqr/data/reference_data")
    run_in_pod(
        pod_name,
        "wget -N https://storage.googleapis.com/seqr-reference-data/seqr-resource-bundle.tar.gz -O /seqr/data/reference_data/seqr-resource-bundle.tar.gz"
    )
    run_in_pod(
        pod_name,
        "tar xzf /seqr/data/reference_data/seqr-resource-bundle.tar.gz -C /seqr/data/reference_data",
        verbose=True)
    run_in_pod(pod_name,
               "rm /seqr/data/reference_data/seqr-resource-bundle.tar.gz")

    # load legacy resources
    run_in_pod(pod_name, "python -u manage.py load_resources", verbose=True)
    run_in_pod(pod_name, "python -u manage.py load_omim", verbose=True)
示例#8
0
def copy_files_to_or_from_pod(component,
                              deployment_target,
                              source_path,
                              dest_path,
                              direction=1):
    """Copy file(s) to or from the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        source_path (string): source file path. If copying files to the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        dest_path (string): destination file path. If copying files from the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        direction (int): If > 0 the file will be copied to the pod. If < 0, then it will be copied from the pod.
    """
    full_pod_name = get_pod_name(component,
                                 deployment_target=deployment_target)
    if not full_pod_name:
        raise ValueError(
            "No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
            % locals())

    if direction < 0:  # copy from pod
        source_path = "%s:%s" % (full_pod_name, source_path)
    elif direction > 0:  # copy to pod
        dest_path = "%s:%s" % (full_pod_name, dest_path)

    run("kubectl cp '%(source_path)s' '%(dest_path)s'" % locals())
示例#9
0
def port_forward(component_port_pairs=[],
                 deployment_target=None,
                 wait=True,
                 open_browser=False,
                 use_kubectl_proxy=False):
    """Executes kubectl command to set up port forwarding between localhost and the given pod.
    While this is running, connecting to localhost:<port> will be the same as connecting to that port
    from the pod's internal network.

    Args:
        component_port_pairs (list): 2-tuple(s) containing keyword to use for looking up a kubernetes
            pod, along with the port to forward to that pod (eg. ('phenotips', 8080))
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "gcloud-dev"
        wait (bool): Whether to block indefinitely as long as the forwarding process is running.
        open_browser (bool): If component_port_pairs includes components that have an http server
            (eg. "seqr" or "phenotips"), then open a web browser window to the forwarded port.
        use_kubectl_proxy (bool): Whether to use kubectl proxy instead of kubectl port-forward
            (see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#manually-constructing-apiserver-proxy-urls)
    Returns:
        (list): Popen process objects for the kubectl port-forward processes.
    """
    procs = []
    for component_label, port in component_port_pairs:
        if component_label == "kube-scan":
            continue  # See https://github.com/octarinesec/kube-scan for how to connect to the kube-scan pod.

        wait_until_pod_is_running(component_label, deployment_target)

        logger.info("Forwarding port %s for %s" % (port, component_label))
        pod_name = get_pod_name(component_label,
                                deployment_target=deployment_target)

        if use_kubectl_proxy:
            command = "kubectl proxy --port 8001"
        else:
            command = "kubectl port-forward %(pod_name)s %(port)s" % locals()

        p = run_in_background(command)

        if open_browser and component_label in COMPONENTS_TO_OPEN_IN_BROWSER:
            if use_kubectl_proxy:
                url = "http://localhost:8001/api/v1/namespaces/default/services/%(component_label)s:%(port)s/proxy/" % locals(
                )
            else:
                url = "http://localhost:%s" % port

            time.sleep(3)
            os.system("open " + url)

        procs.append(p)

    if wait:
        wait_for(procs)

    return procs
示例#10
0
def troubleshoot_component(component, deployment_target):
    """Runs kubectl command to print detailed debug output for the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    pod_name = get_pod_name(component, deployment_target=deployment_target)

    run("kubectl get pods -o yaml %(pod_name)s" % locals(), verbose=True)
示例#11
0
def load_dataset(deployment_target, project_name, genome_version, sample_type, dataset_type, vcf, memory_to_use=None, cpu_limit=None, **kwargs):
    """Load dataset into elasticsearch.
    """

    pod_name = get_pod_name('pipeline-runner', deployment_target=deployment_target)

    # run load command
    additional_load_command_args = "  ".join("--%s '%s'" % (key.lower().replace("_", "-"), value) for key, value in kwargs.items() if value is not None)

    if deployment_target == "minikube":
        vcf_name = os.path.basename(vcf)
        path_in_pod = "/data/{}".format(vcf_name)
        if os.path.isfile(vcf):
            run("kubectl cp '%(vcf)s' '%(pod_name)s:%(path_in_pod)s'" % locals()) # if local file path, copy file into pod
        elif vcf.startswith("http"):
            run_in_pod(pod_name, "wget -N %(vcf)s -O %(path_in_pod)s" % locals())
        elif vcf.startswith("gs:"):
            run_in_pod(pod_name, "gsutil cp -n %(vcf)s %(path_in_pod)s" % locals())
        vcf = path_in_pod

        total_memory = psutil.virtual_memory().total - 6*10**9  # leave 6Gb for other processes
        memory_to_use = "%sG" % (total_memory / 2 / 10**9) if memory_to_use is None else memory_to_use # divide available memory evenly between spark driver & executor
        cpu_limit = max(1, psutil.cpu_count() / 2) if cpu_limit is None else cpu_limit
        load_command = """/hail-elasticsearch-pipelines/run_hail_locally.sh \
            --driver-memory %(memory_to_use)s \
            --executor-memory %(memory_to_use)s \
            hail_scripts/v01/load_dataset_to_es.py \
                --cpu-limit %(cpu_limit)s \
                --genome-version %(genome_version)s \
                --project-guid %(project_name)s \
                --sample-type %(sample_type)s \
                --dataset-type %(dataset_type)s \
                --skip-validation \
                --exclude-hgmd \
                --vep-block-size 100 \
                --es-block-size 10 \
                --num-shards 1 \
                --max-samples-per-index 99 \
                %(additional_load_command_args)s \
                %(vcf)s
        """ % locals()

    else:
        load_command = """/hail-elasticsearch-pipelines/run_hail_on_dataproc.sh \
            hail_scripts/v01/load_dataset_to_es.py \
                --genome-version %(genome_version)s \
                --project-guid %(project_name)s \
                --sample-type %(sample_type)s \
                --dataset-type %(dataset_type)s \
                %(additional_load_command_args)s \
                %(vcf)s
        """ % locals()

    run_in_pod(pod_name, load_command, verbose=True)
示例#12
0
def troubleshoot_component(component, deployment_target):
    """Runs kubectl command to print detailed debug output for the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    pod_name = get_pod_name(component, deployment_target=deployment_target)

    run("kubectl get pods -o yaml %(pod_name)s" % locals(), verbose=True)
示例#13
0
def print_log(components,
              deployment_target,
              enable_stream_log,
              previous=False,
              wait=True):
    """Executes kubernetes command to print logs for the given pod.

    Args:
        components (list): one or more kubernetes pod labels (eg. 'phenotips' or 'nginx').
            If more than one is specified, logs will be printed from all components in parallel.
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "gcloud-dev", etc.
        enable_stream_log (bool): whether to continuously stream the log instead of just printing
            the log up to now.
        previous (bool): Prints logs from a previous instance of the container. This is useful for debugging pods that
            don't start or immediately enter crash-loop.
        wait (bool): If False, this method will return without waiting for the log streaming process
            to finish printing all logs.

    Returns:
        (list): Popen process objects for the kubectl port-forward processes.
    """
    stream_arg = "-f" if enable_stream_log else ""
    previous_flag = "--previous" if previous else ""

    procs = []
    for component_label in components:
        if component_label == "kube-scan":
            continue  # See https://github.com/octarinesec/kube-scan for how to connect to the kube-scan pod.

        if not previous:
            wait_until_pod_is_running(component_label, deployment_target)

        pod_name = get_pod_name(component_label,
                                deployment_target=deployment_target)

        p = run_in_background(
            "kubectl logs %(stream_arg)s %(previous_flag)s %(pod_name)s" %
            locals(),
            print_command=True)

        def print_command_log():
            for line in iter(p.stdout.readline, ''):
                logger.info(line.strip('\n'))

        t = Thread(target=print_command_log)
        t.start()
        procs.append(p)

    if wait:
        wait_for(procs)

    return procs
示例#14
0
def load_example_project(deployment_target,
                         genome_version="37",
                         cpu_limit=None,
                         start_with_step=None):
    """Load example project

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        genome_version (string): reference genome version - either "37" or "38"
    """

    project_name = "1kg"

    check_kubernetes_context(deployment_target)

    pod_name = get_pod_name('seqr', deployment_target=deployment_target)
    if not pod_name:
        raise ValueError(
            "No 'seqr' pod found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?"
            % locals())

    run_in_pod(
        pod_name,
        "wget -N https://storage.googleapis.com/seqr-reference-data/test-projects/1kg.ped"
        % locals())
    #run_in_pod(pod_name, "gsutil cp %(ped)s ." % locals())

    # TODO call APIs instead?
    run_in_pod(
        pod_name,
        "python2.7 -u -m manage create_project -p '1kg.ped' '%(project_name)s'"
        % locals(),
        verbose=True)

    if genome_version == "37":
        vcf_filename = "1kg.vcf.gz"
    elif genome_version == "38":
        vcf_filename = "1kg.liftover.GRCh38.vep.vcf.gz"
    else:
        raise ValueError("Unexpected genome_version: %s" % (genome_version, ))

    load_dataset(
        deployment_target,
        project_name=project_name,
        genome_version=genome_version,
        sample_type="WES",
        dataset_type="VARIANTS",
        cpu_limit=cpu_limit,
        start_with_step=start_with_step,
        vcf=
        "https://storage.googleapis.com/seqr-reference-data/test-projects/%(vcf_filename)s"
        % locals())
示例#15
0
def port_forward(component_port_pairs=[], deployment_target=None, wait=True, open_browser=False, use_kubectl_proxy=False):
    """Executes kubectl command to set up port forwarding between localhost and the given pod.
    While this is running, connecting to localhost:<port> will be the same as connecting to that port
    from the pod's internal network.

    Args:
        component_port_pairs (list): 2-tuple(s) containing keyword to use for looking up a kubernetes
            pod, along with the port to forward to that pod (eg. ('mongo', 27017), or ('phenotips', 8080))
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        wait (bool): Whether to block indefinitely as long as the forwarding process is running.
        open_browser (bool): If component_port_pairs includes components that have an http server
            (eg. "seqr" or "phenotips"), then open a web browser window to the forwarded port.
        use_kubectl_proxy (bool): Whether to use kubectl proxy instead of kubectl port-forward
            (see https://kubernetes.io/docs/tasks/access-application-cluster/access-cluster/#manually-constructing-apiserver-proxy-urls)
    Returns:
        (list): Popen process objects for the kubectl port-forward processes.
    """
    procs = []
    for component_label, port in component_port_pairs:
        wait_until_pod_is_running(component_label, deployment_target)

        logger.info("Forwarding port %s for %s" % (port, component_label))
        pod_name = get_pod_name(component_label, deployment_target=deployment_target)

        if use_kubectl_proxy:
            command = "kubectl proxy --port 8001"
        else:
            command = "kubectl port-forward %(pod_name)s %(port)s" % locals()

        p = run_in_background(command)

        if open_browser and component_label in COMPONENTS_TO_OPEN_IN_BROWSER:
            if use_kubectl_proxy:
                url = "http://localhost:8001/api/v1/namespaces/default/services/%(component_label)s:%(port)s/proxy/" % locals()
            else:
                url = "http://localhost:%s" % port

            time.sleep(3)
            os.system("open " + url)

        procs.append(p)

    if wait:
        wait_for(procs)

    return procs
示例#16
0
def copy_files_to_or_from_pod(component, deployment_target, source_path, dest_path, direction=1):
    """Copy file(s) to or from the given component.

    Args:
        component (string): component label (eg. "postgres")
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        source_path (string): source file path. If copying files to the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        dest_path (string): destination file path. If copying files from the component, it should be a local path. Otherwise, it should be a file path inside the component pod.
        direction (int): If > 0 the file will be copied to the pod. If < 0, then it will be copied from the pod.
    """
    full_pod_name = get_pod_name(component, deployment_target=deployment_target)
    if not full_pod_name:
        raise ValueError("No '%(pod_name)s' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?" % locals())

    if direction < 0:  # copy from pod
        source_path = "%s:%s" % (full_pod_name, source_path)
    elif direction > 0: # copy to pod
        dest_path = "%s:%s" % (full_pod_name, dest_path)

    run("kubectl cp '%(source_path)s' '%(dest_path)s'" % locals())
示例#17
0
def print_log(components, deployment_target, enable_stream_log, previous=False, wait=True):
    """Executes kubernetes command to print logs for the given pod.

    Args:
        components (list): one or more kubernetes pod labels (eg. 'phenotips' or 'nginx').
            If more than one is specified, logs will be printed from all components in parallel.
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        enable_stream_log (bool): whether to continuously stream the log instead of just printing
            the log up to now.
        previous (bool): Prints logs from a previous instance of the container. This is useful for debugging pods that
            don't start or immediately enter crash-loop.
        wait (bool): If False, this method will return without waiting for the log streaming process
            to finish printing all logs.

    Returns:
        (list): Popen process objects for the kubectl port-forward processes.
    """
    stream_arg = "-f" if enable_stream_log else ""
    previous_flag = "--previous" if previous else ""

    procs = []
    for component_label in components:

        if not previous:
            wait_until_pod_is_running(component_label, deployment_target)

        pod_name = get_pod_name(component_label, deployment_target=deployment_target)

        p = run_in_background("kubectl logs %(stream_arg)s %(previous_flag)s %(pod_name)s" % locals(), print_command=True)
        def print_command_log():
            for line in iter(p.stdout.readline, ''):
                logger.info(line.strip('\n'))

        t = Thread(target=print_command_log)
        t.start()
        procs.append(p)

    if wait:
        wait_for(procs)

    return procs
示例#18
0
def load_example_project(deployment_target, genome_version="37", cpu_limit=None, start_with_step=None):
    """Load example project

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
        genome_version (string): reference genome version - either "37" or "38"
    """

    project_name = "1kg"

    check_kubernetes_context(deployment_target)

    pod_name = get_pod_name('seqr', deployment_target=deployment_target)
    if not pod_name:
        raise ValueError("No 'seqr' pod found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?" % locals())

    run_in_pod(pod_name, "wget -N https://storage.googleapis.com/seqr-reference-data/test-projects/1kg.ped" % locals())
    #run_in_pod(pod_name, "gsutil cp %(ped)s ." % locals())

    # TODO call APIs instead?
    run_in_pod(pod_name, "python2.7 -u -m manage create_project -p '1kg.ped' '%(project_name)s'" % locals(), verbose=True)

    if genome_version == "37":
        vcf_filename = "1kg.vcf.gz"
    elif genome_version == "38":
        vcf_filename = "1kg.liftover.GRCh38.vep.vcf.gz"
    else:
        raise ValueError("Unexpected genome_version: %s" % (genome_version,))

    load_dataset(
        deployment_target,
        project_name=project_name,
        genome_version=genome_version,
        sample_type="WES",
        dataset_type="VARIANTS",
        cpu_limit=cpu_limit,
        start_with_step=start_with_step,
        vcf="https://storage.googleapis.com/seqr-reference-data/test-projects/%(vcf_filename)s" % locals())
示例#19
0
def update_reference_data(deployment_target):
    """DEPRECATED. Load reference data into mongodb.

    Args:
        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
    """

    check_kubernetes_context(deployment_target)

    pod_name = get_pod_name('seqr', deployment_target=deployment_target)
    if not pod_name:
        raise ValueError("No 'seqr' pods found. Is the kubectl environment configured in this terminal? and have either of these pods been deployed?" % locals())

    # commented out because this is not loaded from settings backup
    #run_in_pod(pod_name, "python2.7 -u manage.py update_all_reference_data --omim-key '$OMIM_KEY'" % locals(), verbose=True, print_command=True)

    run_in_pod(pod_name, "mkdir -p /seqr/data/reference_data")
    run_in_pod(pod_name, "wget -N https://storage.googleapis.com/seqr-reference-data/seqr-resource-bundle.tar.gz -O /seqr/data/reference_data/seqr-resource-bundle.tar.gz")
    run_in_pod(pod_name, "tar xzf /seqr/data/reference_data/seqr-resource-bundle.tar.gz -C /seqr/data/reference_data", verbose=True)
    run_in_pod(pod_name, "rm /seqr/data/reference_data/seqr-resource-bundle.tar.gz")

    # load legacy resources
    run_in_pod(pod_name, "python -u manage.py load_resources", verbose=True)
    run_in_pod(pod_name, "python -u manage.py load_omim", verbose=True)
示例#20
0
def deploy_phenotips(settings):
    print_separator("phenotips")

    phenotips_service_port = settings["PHENOTIPS_SERVICE_PORT"]
    restore_phenotips_db_from_backup = settings.get("RESTORE_PHENOTIPS_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]

    if reset_db or restore_phenotips_db_from_backup:
        delete_pod("phenotips", settings)
        run_in_pod("postgres", "psql -U postgres postgres -c 'drop database xwiki'" % locals(),
           verbose=True,
            errors_to_ignore=["does not exist"],
            deployment_target=deployment_target,
        )
    elif settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("phenotips", settings)

    # init postgres
    if not settings["ONLY_PUSH_TO_REGISTRY"]:
        run_in_pod("postgres",
            "psql -U postgres postgres -c \"create role xwiki with CREATEDB LOGIN PASSWORD 'xwiki'\"" % locals(),
            verbose=True,
            errors_to_ignore=["already exists"],
            deployment_target=deployment_target,
        )

        run_in_pod("postgres",
            "psql -U xwiki postgres -c 'create database xwiki'" % locals(),
            verbose=True,
            errors_to_ignore=["already exists"],
            deployment_target=deployment_target,
        )

        run_in_pod("postgres",
            "psql -U postgres postgres -c 'grant all privileges on database xwiki to xwiki'" % locals(),
        )

    # build container
    docker_build("phenotips", settings, ["--build-arg PHENOTIPS_SERVICE_PORT=%s" % phenotips_service_port])

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)

    for i in range(0, 3):
        # opening the PhenoTips website for the 1st time triggers a final set of initialization
        # steps which take ~ 1 minute, so run wget to trigger this

        try:
            run_in_pod("phenotips",
                #command="wget http://localhost:%(phenotips_service_port)s -O test.html" % locals(),
                command="curl --verbose -L -u Admin:admin http://localhost:%(phenotips_service_port)s -o test.html" % locals(),
                verbose=True
            )
        except Exception as e:
            logger.error(str(e))

        if i < 2:
            logger.info("Waiting for phenotips to start up...")
            time.sleep(10)

    if restore_phenotips_db_from_backup:
        delete_pod("phenotips", settings)

        postgres_pod_name = get_pod_name("postgres", deployment_target=deployment_target)

        run("kubectl cp '%(restore_phenotips_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), verbose=True)
        run_in_pod("postgres", "/root/restore_database_backup.sh  xwiki  xwiki  /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True)
        run_in_pod("postgres", "rm /root/$(basename %(restore_phenotips_db_from_backup)s)" % locals(), deployment_target=deployment_target, verbose=True)

        deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)
示例#21
0
def load_dataset(deployment_target,
                 project_name,
                 genome_version,
                 sample_type,
                 dataset_type,
                 vcf,
                 memory_to_use=None,
                 cpu_limit=None,
                 **kwargs):
    """Load dataset into elasticsearch.
    """

    pod_name = get_pod_name('pipeline-runner',
                            deployment_target=deployment_target)

    # run load command
    additional_load_command_args = "  ".join(
        "--%s '%s'" % (key.lower().replace("_", "-"), value)
        for key, value in kwargs.items() if value is not None)

    if deployment_target == "minikube":
        vcf_name = os.path.basename(vcf)
        path_in_pod = "/data/{}".format(vcf_name)
        if os.path.isfile(vcf):
            run("kubectl cp '%(vcf)s' '%(pod_name)s:%(path_in_pod)s'" %
                locals())  # if local file path, copy file into pod
        elif vcf.startswith("http"):
            run_in_pod(pod_name,
                       "wget -N %(vcf)s -O %(path_in_pod)s" % locals())
        elif vcf.startswith("gs:"):
            run_in_pod(pod_name,
                       "gsutil cp -n %(vcf)s %(path_in_pod)s" % locals())
        vcf = path_in_pod

        total_memory = psutil.virtual_memory(
        ).total - 6 * 10**9  # leave 6Gb for other processes
        memory_to_use = "%sG" % (
            total_memory / 2 / 10**9
        ) if memory_to_use is None else memory_to_use  # divide available memory evenly between spark driver & executor
        cpu_limit = max(1,
                        psutil.cpu_count() /
                        2) if cpu_limit is None else cpu_limit
        load_command = """/hail-elasticsearch-pipelines/run_hail_locally.sh \
            --driver-memory %(memory_to_use)s \
            --executor-memory %(memory_to_use)s \
            hail_scripts/v01/load_dataset_to_es.py \
                --cpu-limit %(cpu_limit)s \
                --genome-version %(genome_version)s \
                --project-guid %(project_name)s \
                --sample-type %(sample_type)s \
                --dataset-type %(dataset_type)s \
                --skip-validation \
                --exclude-hgmd \
                --vep-block-size 100 \
                --es-block-size 10 \
                --num-shards 1 \
                --max-samples-per-index 99 \
                %(additional_load_command_args)s \
                %(vcf)s
        """ % locals()

    else:
        load_command = """/hail-elasticsearch-pipelines/run_hail_on_dataproc.sh \
            hail_scripts/v01/load_dataset_to_es.py \
                --genome-version %(genome_version)s \
                --project-guid %(project_name)s \
                --sample-type %(sample_type)s \
                --dataset-type %(dataset_type)s \
                %(additional_load_command_args)s \
                %(vcf)s
        """ % locals()

    run_in_pod(pod_name, load_command, verbose=True)
示例#22
0
def deploy_phenotips(settings):
    print_separator("phenotips")

    phenotips_service_port = settings["PHENOTIPS_SERVICE_PORT"]
    restore_phenotips_db_from_backup = settings.get(
        "RESTORE_PHENOTIPS_DB_FROM_BACKUP")
    reset_db = settings.get("RESET_DB")

    deployment_target = settings["DEPLOY_TO"]

    if reset_db or restore_phenotips_db_from_backup:
        delete_pod("phenotips", settings)
        run_in_pod(
            "postgres",
            "psql -U postgres postgres -c 'drop database xwiki'" % locals(),
            verbose=True,
            errors_to_ignore=["does not exist"],
            deployment_target=deployment_target,
        )
    elif settings["DELETE_BEFORE_DEPLOY"]:
        delete_pod("phenotips", settings)

    # init postgres
    if not settings["ONLY_PUSH_TO_REGISTRY"]:
        run_in_pod(
            "postgres",
            "psql -U postgres postgres -c \"create role xwiki with CREATEDB LOGIN PASSWORD 'xwiki'\""
            % locals(),
            verbose=True,
            errors_to_ignore=["already exists"],
            deployment_target=deployment_target,
        )

        run_in_pod(
            "postgres",
            "psql -U xwiki postgres -c 'create database xwiki'" % locals(),
            verbose=True,
            errors_to_ignore=["already exists"],
            deployment_target=deployment_target,
        )

        run_in_pod(
            "postgres",
            "psql -U postgres postgres -c 'grant all privileges on database xwiki to xwiki'"
            % locals(),
        )

    # build container
    docker_build(
        "phenotips", settings,
        ["--build-arg PHENOTIPS_SERVICE_PORT=%s" % phenotips_service_port])

    if settings["ONLY_PUSH_TO_REGISTRY"]:
        return

    deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)

    for i in range(0, 3):
        # opening the PhenoTips website for the 1st time triggers a final set of initialization
        # steps which take ~ 1 minute, so run wget to trigger this

        try:
            run_in_pod(
                "phenotips",
                #command="wget http://localhost:%(phenotips_service_port)s -O test.html" % locals(),
                command=
                "curl --verbose -L -u Admin:admin http://localhost:%(phenotips_service_port)s -o test.html"
                % locals(),
                verbose=True)
        except Exception as e:
            logger.error(str(e))

        if i < 2:
            logger.info("Waiting for phenotips to start up...")
            time.sleep(10)

    if restore_phenotips_db_from_backup:
        delete_pod("phenotips", settings)

        postgres_pod_name = get_pod_name("postgres",
                                         deployment_target=deployment_target)

        run("kubectl cp '%(restore_phenotips_db_from_backup)s' %(postgres_pod_name)s:/root/$(basename %(restore_phenotips_db_from_backup)s)"
            % locals(),
            verbose=True)
        run_in_pod(
            "postgres",
            "/root/restore_database_backup.sh  xwiki  xwiki  /root/$(basename %(restore_phenotips_db_from_backup)s)"
            % locals(),
            deployment_target=deployment_target,
            verbose=True)
        run_in_pod(
            "postgres",
            "rm /root/$(basename %(restore_phenotips_db_from_backup)s)" %
            locals(),
            deployment_target=deployment_target,
            verbose=True)

        deploy_pod("phenotips", settings, wait_until_pod_is_ready=True)