Пример #1
0
def load_example_project(deployment_label, assembly="37"):
    """Load example project

    Args:
        assembly (string): reference genome version - either "37" or "38"
    """

    check_kubernetes_context(deployment_label)

    pod_name = _get_pod_name('seqr')
    if not pod_name:
        raise ValueError(
            "No 'seqr' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
            % locals())

    if assembly == "37":
        vcf_filename = "1kg.vep.vcf.gz"
    elif assembly == "38":
        vcf_filename = "1kg.liftover.GRCh38.vep.vcf.gz"
    else:
        raise ValueError("Unexpected assembly: %s" % str(assembly))

    project_id = "1kg"
    vcf = "https://storage.googleapis.com/seqr-public/test-projects/1kg-exomes/%(vcf_filename)s" % locals(
    )
    ped = "https://storage.googleapis.com/seqr-public/test-projects/1kg-exomes/1kg.ped"

    load_project(deployment_label,
                 project_id=project_id,
                 assembly=assembly,
                 vcf=vcf,
                 ped=ped)
Пример #2
0
def load_project_cassandra(
        deployment_label,
        project_id="1kg",
        assembly="37",
        vds_path="gs://seqr-hail/annotated/Cohen.1kpart.vds"):
    """Export VDS to cassandra

    Args:
        deployment_label (string): "local", "gcloud-dev", or "gcloud-prod"
        project_id (string): project id
        assembly (string): reference genome version - either "37" or "38"
        vds_path (string): path of annotated VDS
    """

    check_kubernetes_context(deployment_label)

    settings = retrieve_settings(deployment_label)

    _init_dataproc_cluster(settings, assembly=assembly)

    pod_name = lookup_json_path("pods",
                                labels={'name': 'cassandra'},
                                json_path=".items[0].metadata.name")

    _run_shell_command("""
    kubectl exec -i %(pod_name)s -- cqlsh <<EOF
        DROP KEYSPACE IF EXISTS seqr;
        CREATE KEYSPACE seqr WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}  AND durable_writes = true;

        CREATE TABLE seqr.seqr (chrom text, start int, ref text, alt text, dataset_5fid text, PRIMARY KEY (chrom, start, ref, alt, dataset_5fid));
    EOF
    """ % locals()).wait()

    script_path = "scripts/loading/export_to_cass.py"
    node_name = lookup_json_path("pods",
                                 labels={'name': 'cassandra'},
                                 json_path=".items[0].spec.nodeName")

    _submit_to_hail(settings, script_path, node_name, vds_path)

    _run_shell_command("""
    kubectl exec -i %(pod_name)s -- cqlsh <<EOF
        select count(*) from seqr.seqr;
    EOF
    """ % locals()).wait()
Пример #3
0
def load_reference_data(deployment_label, assembly="37"):
    """Load reference data

    Args:
        assembly (string): reference genome version - either "37" or "38"
    """

    check_kubernetes_context(deployment_label)

    pod_name = _get_pod_name('seqr')
    if not pod_name:
        raise ValueError(
            "No 'seqr' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
            % locals())

    _run_shell_command(
        "kubectl exec %(pod_name)s -- mkdir -p /data/reference_data/" %
        locals())
    _run_shell_command(
        "kubectl exec %(pod_name)s -- wget -N https://storage.googleapis.com/seqr-public/reference-data/seqr-resource-bundle.tar.gz -P /data/reference_data/"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- tar -xzf /data/reference_data/seqr-resource-bundle.tar.gz --directory /data/reference_data/"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py load_resources" %
        locals()).wait()

    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py update_gencode" %
        locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py update_human_phenotype_ontology"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py update_omim" %
        locals()).wait()

    _run_shell_command(
        "kubectl exec %(pod_name)s -- /usr/local/bin/restart_server.sh" %
        locals()).wait()
Пример #4
0
def load_project_solr(deployment_label,
                      project_id="1kg",
                      assembly="37",
                      vds_path="gs://seqr-hail/annotated/Cohen.1kpart.vds"):
    """Export VDS to solr

    Args:
        deployment_label (string): "local", "gcloud-dev", or "gcloud-prod"
        project_id (string): project id
        assembly (string): reference genome version - either "37" or "38"
        vcf (string): VCF path
    """

    check_kubernetes_context(deployment_label)

    settings = retrieve_settings(deployment_label)

    _init_dataproc_cluster(settings, assembly=assembly)

    pod_name = lookup_json_path("pods",
                                labels={'name': 'solr'},
                                json_path=".items[0].metadata.name")

    _run_shell_command(
        "kubectl exec %(pod_name)s -- su -c '/usr/local/solr-6.4.2/bin/solr delete -c seqr_noref' solr || true"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- su -c '/usr/local/solr-6.4.2/bin/solr create_collection -c seqr_noref' solr || true"
        % locals()).wait()

    script_path = "scripts/loading/export_to_solr.py"
    node_name = lookup_json_path("pods",
                                 labels={'name': 'solr'},
                                 json_path=".items[0].spec.nodeName")

    _submit_to_hail(settings, script_path, node_name, vds_path)

    _run_shell_command(
        "kubectl exec -i %(pod_name)s -- /bin/bash -c \"curl 'http://localhost:30002/solr/seqr_noref/select?indent=on&q=*:*&wt=json'\""
        % locals()).wait()
Пример #5
0
def load_allele_frequencies(deployment_label, assembly="37"):
    """Load ExAC and 1kg allele frequency datasets. These are larger and take longer to load than other reference data

    Args:
        assembly (string): reference genome version - either "37" or "38"
    """

    check_kubernetes_context(deployment_label)

    pod_name = _get_pod_name('seqr')
    if not pod_name:
        raise ValueError(
            "No 'seqr' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
            % locals())

    _run_shell_command(
        "kubectl exec %(pod_name)s -- wget -N http://seqr.broadinstitute.org/static/bundle/ExAC.r0.3.sites.vep.popmax.clinvar.vcf.gz -P /data/reference_data/"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- wget -N http://seqr.broadinstitute.org/static/bundle/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.decomposed.with_popmax.vcf.gz -P /data/reference_data/"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py load_reference" %
        locals()).wait()
Пример #6
0
def deploy(deployment_label,
           component=None,
           output_dir=None,
           other_settings={}):
    """
    Args:
        deployment_label (string): one of the DEPLOYMENT_LABELS  (eg. "local", or "gcloud")
        component (string): optionally specifies one of the components from the DEPLOYABLE_COMPONENTS lists (eg. "postgres" or "phenotips").
            If this is set to None, all DEPLOYABLE_COMPONENTS will be deployed in sequence.
        output_dir (string): path of directory where to put deployment logs and rendered config files
        other_settings (dict): a dictionary of other key-value pairs for use during deployment
    """

    check_kubernetes_context(deployment_label)

    timestamp = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime())
    output_dir = output_dir or "deployments/%(timestamp)s_%(deployment_label)s" % locals(
    )

    # configure logging output
    log_dir = os.path.join(output_dir, "logs")
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)
    log_file_path = os.path.join(log_dir, "deploy.log")
    sh = logging.StreamHandler(open(log_file_path, "w"))
    sh.setLevel(logging.INFO)
    logger.addHandler(sh)
    logger.info("Starting log file: %(log_file_path)s" % locals())

    # parse config files
    settings = retrieve_settings(deployment_label)
    settings.update(other_settings)

    for key, value in settings.items():
        key = key.upper()
        settings[key] = value
        logger.info("%s = %s" % (key, value))

    # copy configs, templates and scripts to output directory
    output_base_dir = os.path.join(output_dir, 'configs')
    for file_path in glob.glob("templates/*/*.*") + glob.glob(
            "templates/*/*/*.*"):
        file_path = file_path.replace('templates/', '')
        input_base_dir = os.path.join(BASE_DIR, 'templates')
        render(template_processor, input_base_dir, file_path, settings,
               output_base_dir)

    for file_path in glob.glob(os.path.join("scripts/*.sh")):
        render(script_processor, BASE_DIR, file_path, settings, output_dir)

    for file_path in glob.glob(os.path.join("scripts/*.py")):
        shutil.copy(file_path, output_base_dir)

    for file_path in glob.glob(os.path.join("config/*.yaml")):
        shutil.copy(file_path, output_base_dir)

    # copy docker directory to output directory
    docker_src_dir = os.path.join(BASE_DIR, "../docker/")
    docker_dest_dir = os.path.join(output_dir, "docker")
    logger.info("Copying %(docker_src_dir)s to %(docker_dest_dir)s" % locals())
    shutil.copytree(docker_src_dir, docker_dest_dir)

    # copy secrets directory
    secrets_src_dir = os.path.join(BASE_DIR,
                                   "secrets/%(deployment_label)s" % locals())
    secrets_dest_dir = os.path.join(output_dir,
                                    "secrets/%(deployment_label)s" % locals())
    logger.info("Copying %(secrets_src_dir)s to %(secrets_dest_dir)s" %
                locals())
    shutil.copytree(secrets_src_dir, secrets_dest_dir)

    # deploy
    if component:
        deployment_scripts = [
            s for s in DEPLOYMENT_SCRIPTS if 'init' in s or component in s
            or component.replace('-', '_') in s
        ]
    else:
        if deployment_label == "gcloud-dev":
            deployment_scripts = DEPLOYMENT_SCRIPTS
        else:
            deployment_scripts = [
                s for s in DEPLOYMENT_SCRIPTS if not any(
                    [k in s for k in ("solr", "cassandra", "database_api")])
            ]

    os.chdir(output_dir)
    logger.info("Switched to %(output_dir)s" % locals())

    for path in deployment_scripts:
        logger.info("=========================")
        _run_shell_command(path, verbose=True).wait()
Пример #7
0
def load_project(deployment_label,
                 project_id="1kg",
                 assembly="37",
                 vcf=None,
                 ped=None):
    """Load example project

    Args:
        project_id (string): project id
        assembly (string): reference genome version - either "37" or "38"
        vcf (string): VCF path
        ped (string): PED path
    """

    check_kubernetes_context(deployment_label)

    pod_name = _get_pod_name('seqr')
    if not pod_name:
        raise ValueError(
            "No 'seqr' pods found. Is the kubectl environment configured in this terminal? and has this type of pod been deployed?"
            % locals())

    if not project_id:
        raise ValueError("project_id not specified")
    if not vcf:
        raise ValueError("vcf not specified")
    if not ped:
        raise ValueError("ped not specified")

    vcf_filename = os.path.basename(vcf)
    ped_filename = os.path.basename(ped)

    _run_shell_command("kubectl exec %(pod_name)s -- wget -N %(vcf)s" %
                       locals()).wait()
    _run_shell_command("kubectl exec %(pod_name)s -- wget -N %(ped)s" %
                       locals()).wait()

    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py add_project '%(project_id)s' '%(project_id)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py add_individuals_to_project '%(project_id)s' --ped '%(ped_filename)s'"
        % locals()).wait()

    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py add_vcf_to_project --clear '%(project_id)s' '%(vcf_filename)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py add_project_to_phenotips '%(project_id)s' '%(project_id)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py add_individuals_to_phenotips '%(project_id)s' --ped '%(ped_filename)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py generate_pedigree_images -f '%(project_id)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py add_default_tags '%(project_id)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py load_project '%(project_id)s'"
        % locals()).wait()
    _run_shell_command(
        "kubectl exec %(pod_name)s -- python2.7 -u manage.py load_project_datastore '%(project_id)s'"
        % locals()).wait()