def test_build_institute_no_sanger():
    ## GIVEN a institute without sanger recipients
    institute_info = dict(internal_id='cust000', display_name='test')
    ## WHEN building the institute
    ins = build_institute(internal_id=institute_info['internal_id'],
                          display_name=institute_info['display_name'])

    assert 'sanger_recipients' not in ins
def test_build_institute(parsed_institute):
    ins = build_institute(
        internal_id = parsed_institute['institute_id'],
        display_name = parsed_institute['display_name'],
        sanger_recipients = parsed_institute['sanger_recipients'],
    )
    
    assert ins['internal_id'] == ins['_id'] == parsed_institute['institute_id']
    assert isinstance(ins['sanger_recipients'], list)
示例#3
0
def institute_obj(request, parsed_institute):
    print('')
    LOG.info('Building a institute')
    institute = build_institute(
        internal_id=parsed_institute['institute_id'],
        display_name=parsed_institute['display_name'],
        sanger_recipients=parsed_institute['sanger_recipients'],
    )
    return institute
def test_build_institute(parsed_institute):
    ins = build_institute(
        internal_id=parsed_institute['institute_id'],
        display_name=parsed_institute['display_name'],
        sanger_recipients=parsed_institute['sanger_recipients'],
    )

    assert ins['internal_id'] == ins['_id'] == parsed_institute['institute_id']
    assert isinstance(ins['sanger_recipients'], list)
def test_build_institute(parsed_institute):
    ins = build_institute(
        internal_id=parsed_institute["institute_id"],
        display_name=parsed_institute["display_name"],
        sanger_recipients=parsed_institute["sanger_recipients"],
    )

    assert ins["internal_id"] == ins["_id"] == parsed_institute["institute_id"]
    assert isinstance(ins["sanger_recipients"], list)
def test_build_institute_no_sanger():
    ## GIVEN a institute without sanger recipients
    institute_info = dict(internal_id="cust000", display_name="test")
    ## WHEN building the institute
    ins = build_institute(
        internal_id=institute_info["internal_id"],
        display_name=institute_info["display_name"],
    )

    assert "sanger_recipients" not in ins
示例#7
0
def institute_obj(request, parsed_institute):
    print("")
    LOG.info("Building a institute")
    institute = build_institute(
        internal_id=parsed_institute["institute_id"],
        display_name=parsed_institute["display_name"],
        sanger_recipients=parsed_institute["sanger_recipients"],
    )
    # move institute created time 1 day back in time
    institute["created_at"] = datetime.datetime.now() - datetime.timedelta(
        days=1)
    return institute
def test_build_institute_no_sanger():
    ## GIVEN a institute without sanger recipients
    institute_info = dict(
            internal_id = 'cust000',
            display_name = 'test'
    )
    ## WHEN building the institute
    ins = build_institute(
        internal_id = institute_info['internal_id'],
        display_name = institute_info['display_name']
    )
    
    assert 'sanger_recipients' not in ins
示例#9
0
def load_institute(adapter, internal_id, display_name, sanger_recipients=None):
    """Load a institute into the database

        Args:
            adapter(MongoAdapter)
            internal_id(str)
            display_name(str)
            sanger_recipients(list(email))
    """

    institute_obj = build_institute(
        internal_id=internal_id,
        display_name=display_name,
        sanger_recipients=sanger_recipients,
    )

    adapter.add_institute(institute_obj)
示例#10
0
def load_institute(adapter, internal_id, display_name, sanger_recipients=None):
    """Load a institute into the database

        Args:
            adapter(MongoAdapter)
            internal_id(str)
            display_name(str)
            sanger_recipients(list(email))
    """

    institute_obj = build_institute(
        internal_id=internal_id,
        display_name=display_name,
        sanger_recipients=sanger_recipients
    )
    log.info("Loading institute {0} with display name {1}" \
             " into database".format(internal_id, display_name))

    adapter.add_institute(institute_obj)
示例#11
0
文件: setup.py 项目: terestahl/scout
def setup_scout(
    adapter,
    institute_id="cust000",
    user_name="Clark Kent",
    user_mail="*****@*****.**",
    api_key=None,
    demo=False,
    resource_files=None,
):
    """Function to setup a working scout instance.

    WARNING: If the instance is populated all collections will be deleted

    Build insert a institute and an admin user.
    There are multiple sources of information that is used by scout and that needs to exist for
    scout to work proper.

    Genes:
         Scout uses HGNC as the source for gene identifiers en ensembl as source for coordinates.
         Additional information of disease connections for genes if fetched from OMIM.
         Link between hpo terms and genes is fetched from HPO
         For more details check the documentation.

    """

    LOG.info("Check if there was a database, delete if existing")
    existing_database = False
    for collection_name in adapter.db.list_collection_names():
        if collection_name.startswith("system"):
            continue
        LOG.info("Deleting collection %s", collection_name)
        adapter.db.drop_collection(collection_name)
        existing_database = True

    if existing_database:
        LOG.info("Database deleted")

    institute_obj = build_institute(
        internal_id=institute_id,
        display_name=institute_id,
        sanger_recipients=[user_mail],
    )
    adapter.add_institute(institute_obj)

    user_obj = dict(
        _id=user_mail,
        email=user_mail,
        name=user_name,
        roles=["admin"],
        institutes=[institute_id],
    )

    adapter.add_user(user_obj)

    resource_files = resource_files or {}
    if demo:
        resource_files = demo_files
    mim2gene_lines = None
    genemap_lines = None
    mim2gene_path = resource_files.get("mim2gene_path")
    genemap_path = resource_files.get("genemap_path")
    if genemap_path and mim2gene_path:
        mim2gene_lines = [line for line in get_file_handle(mim2gene_path)]
        genemap_lines = [line for line in get_file_handle(genemap_path)]

    if (genemap_lines is None) and api_key:
        try:
            mim_files = fetch_mim_files(api_key, mim2genes=True, genemap2=True)
        except Exception as err:
            LOG.warning(err)
            raise err
        mim2gene_lines = mim_files["mim2genes"]
        genemap_lines = mim_files["genemap2"]

    if resource_files.get("hpogenes_path"):
        hpo_gene_lines = [
            line
            for line in get_file_handle(resource_files.get("hpogenes_path"))
        ]
    else:
        hpo_gene_lines = fetch_genes_to_hpo_to_disease()

    if resource_files.get("hgnc_path"):
        hgnc_lines = [
            line for line in get_file_handle(resource_files.get("hgnc_path"))
        ]
    else:
        hgnc_lines = fetch_hgnc()

    if resource_files.get("exac_path"):
        exac_lines = [
            line for line in get_file_handle(resource_files.get("exac_path"))
        ]
    else:
        exac_lines = fetch_exac_constraint()

    # Load cytobands into cytoband collection
    for genome_build, cytobands_path in cytoband_files.items():
        load_cytobands(cytobands_path, genome_build, adapter)

    builds = ["37", "38"]
    for build in builds:
        genes_path = "genes{}_path".format(build)
        if resource_files.get(genes_path):
            ensembl_genes = get_file_handle(resource_files[genes_path])
        else:
            ensembl_genes = fetch_ensembl_genes(build=build)

        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim2gene_lines,
            genemap_lines=genemap_lines,
            hpo_lines=hpo_gene_lines,
            build=build,
        )

        # Create a map from ensembl ids to gene objects
        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj["ensembl_id"]
            ensembl_genes[ensembl_id] = gene_obj

        tx_path = "transcripts{}_path".format(build)
        if resource_files.get(tx_path):
            ensembl_transcripts = get_file_handle(resource_files[tx_path])
        else:
            ensembl_transcripts = fetch_ensembl_transcripts(build=build)
        # Load the transcripts for a certain build
        transcripts = load_transcripts(adapter, ensembl_transcripts, build,
                                       ensembl_genes)

    hpo_terms_handle = None
    if resource_files.get("hpoterms_path"):
        hpo_terms_handle = get_file_handle(resource_files["hpoterms_path"])

    hpo_to_genes_handle = None
    if resource_files.get("hpo_to_genes_path"):
        hpo_to_genes_handle = get_file_handle(
            resource_files["hpo_to_genes_path"])

    hpo_disease_handle = None
    if resource_files.get("hpo_disease_path"):
        hpo_disease_handle = get_file_handle(
            resource_files["hpo_disease_path"])

    load_hpo(
        adapter=adapter,
        disease_lines=genemap_lines,
        hpo_lines=hpo_terms_handle,
        hpo_gene_lines=hpo_to_genes_handle,
    )

    # If demo we load a gene panel and some case information
    if demo:
        parsed_panel = parse_gene_panel(
            path=panel_path,
            institute="cust000",
            panel_id="panel1",
            version=1.0,
            display_name="Test panel",
        )
        adapter.load_panel(parsed_panel)

        case_handle = get_file_handle(load_path)
        case_data = yaml.load(case_handle, Loader=yaml.FullLoader)
        config_data = parse_case_data(config=case_data)
        adapter.load_case(config_data)

    LOG.info("Creating indexes")
    adapter.load_indexes()
    LOG.info("Scout instance setup successful")
示例#12
0
def demo(context):
    """Setup a scout demo instance. This instance will be populated with a
       case a gene panel and some variants.
    """
    LOG.info("Running scout setup demo")
    institute_name = context.obj['institute_name']
    user_name = context.obj['user_name']
    user_mail = context.obj['user_mail']

    adapter = context.obj['adapter']

    LOG.info("Setting up database %s", context.obj['mongodb'])
    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        LOG.info("Deleting collection %s", collection_name)
        adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_name,
        display_name=institute_name,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_name]
            )

    adapter.add_user(user_obj)

    # Load the genes and transcripts
    LOG.info("Loading hgnc genes from %s", hgnc_reduced_path)
    hgnc_handle = get_file_handle(hgnc_reduced_path)
    hgnc38 = get_file_handle(hgnc_reduced_path)
    
    LOG.info("Loading exac genes from %s", exac_reduced_path)
    exac_handle = get_file_handle(exac_reduced_path)
    exac38 = get_file_handle(exac_reduced_path)
    
    LOG.info("Loading mim2gene info from %s", mim2gene_reduced_path)
    mim2gene_handle = get_file_handle(mim2gene_reduced_path)
    mim2gene38 = get_file_handle(mim2gene_reduced_path)
    
    LOG.info("Loading genemap info from %s", genemap2_reduced_path)
    genemap_handle = get_file_handle(genemap2_reduced_path)
    genemap38 = get_file_handle(genemap2_reduced_path)
    
    LOG.info("Loading hpo gene info from %s", hpogenes_reduced_path)
    hpo_genes_handle = get_file_handle(hpogenes_reduced_path)
    hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path)
    hpogenes38 = get_file_handle(hpogenes_reduced_path)
    LOG.info("Loading hpo disease info from %s", hpo_phenotype_to_terms_reduced_path)
    hpo_disease_handle = get_file_handle(hpo_phenotype_to_terms_reduced_path)
    LOG.info("Loading hpo terms from %s", hpoterms_reduced_path)
    hpo_terms_handle = get_file_handle(hpoterms_reduced_path)
    
    LOG.info("Loading omim disease info from %s", genemap2_reduced_path)
    disease_handle = get_file_handle(genemap2_reduced_path)
    
    LOG.info("Loading transcripts build 37 info from %s", transcripts37_reduced_path)
    transcripts37_handle = get_file_handle(transcripts37_reduced_path)
    transcripts38_handle = get_file_handle(transcripts38_reduced_path)
    

    genes37 = link_genes(
        ensembl_lines=transcripts37_handle,
        hgnc_lines=hgnc_handle,
        exac_lines=exac_handle,
        mim2gene_lines=mim2gene_handle,
        genemap_lines=genemap_handle,
        hpo_lines=hpo_genes_handle,
    )

    load_hgnc_genes(adapter, genes37, build='37')

    load_hpo(
        adapter=adapter,
        hpo_lines=hpo_terms_handle,
        hpo_gene_lines=hpo_to_genes_handle,
        disease_lines=disease_handle,
        hpo_disease_lines=hpo_disease_handle
    )

    adapter.load_panel(
        path=panel_path, 
        institute='cust000', 
        panel_id='panel1', 
        date=datetime.datetime.now(), 
        panel_type='clinical', 
        version=1.0, 
        display_name='Test panel'
    )

    case_handle = get_file_handle(load_path)
    case_data = yaml.load(case_handle)
    
    adapter.load_case(case_data)

    LOG.info("Creating indexes")

    adapter.load_indexes()

    LOG.info("Scout demo instance setup successful")
示例#13
0
def database(context, institute_name, user_name, user_mail, api_key):
    """Setup a scout database"""
    LOG.info("Running scout setup database")

    # Fetch the omim information
    api_key = api_key or context.obj.get('omim_api_key')
    if not api_key:
        LOG.warning("Please provide a omim api key to load the omim gene panel")
        context.abort()

    try:
        mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True)
    except Exception as err:
        LOG.warning(err)
        context.abort()
    
    # for fn in mim_files:
    #     click.echo("{0}: {1}".format(fn, type(mim_files[fn])))
    #
    # context.abort()
    
    institute_name = institute_name or context.obj['institute_name']
    user_name = user_name or context.obj['user_name']
    user_mail = user_mail or context.obj['user_mail']

    adapter = context.obj['adapter']

    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        if not collection_name.startswith('system'):
            LOG.info("Deleting collection %s", collection_name)
            adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    LOG.info("Setting up database %s", context.obj['mongodb'])

    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_name,
        display_name=institute_name,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_name]
            )

    adapter.add_user(user_obj)
    
    # Fetch the genes to hpo information
    hpo_genes = fetch_hpo_genes()
    
    # Load the genes and transcripts
    genes37 = link_genes(
        ensembl_lines=get_file_handle(transcripts37_path),
        hgnc_lines=get_file_handle(hgnc_path),
        exac_lines=get_file_handle(exac_path),
        mim2gene_lines=mim_files['mim2genes'],
        genemap_lines=mim_files['genemap2'],
        hpo_lines=hpo_genes,
    )

    load_hgnc_genes(adapter, genes37, build='37')

    genes38 = link_genes(
        ensembl_lines=get_file_handle(transcripts38_path),
        hgnc_lines=get_file_handle(hgnc_path),
        exac_lines=get_file_handle(exac_path),
        mim2gene_lines=mim_files['mim2genes'],
        genemap_lines=mim_files['genemap2'],
        hpo_lines=hpo_genes,
    )

    load_hgnc_genes(adapter, genes38, build='38')

    load_hpo(
        adapter=adapter,
        disease_lines=mim_files['genemap2'],
    )

    LOG.info("Creating indexes")
    
    adapter.load_indexes()

    LOG.info("Scout instance setup successful")
示例#14
0
def setup_scout(adapter,
                institute_id='cust000',
                user_name='Clark Kent',
                user_mail='*****@*****.**',
                api_key=None,
                demo=False):
    """docstring for setup_scout"""
    ########################## Delete previous information ##########################
    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        if not collection_name.startswith('system'):
            LOG.info("Deleting collection %s", collection_name)
            adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    ########################## Add a institute ##########################
    #####################################################################
    # Build a institute with id institute_name
    institute_obj = build_institute(internal_id=institute_id,
                                    display_name=institute_id,
                                    sanger_recipients=[user_mail])

    # Add the institute to database
    adapter.add_institute(institute_obj)

    ########################## Add a User ###############################
    #####################################################################
    # Build a user obj
    user_obj = dict(_id=user_mail,
                    email=user_mail,
                    name=user_name,
                    roles=['admin'],
                    institutes=[institute_id])

    adapter.add_user(user_obj)

    ### Get the mim information ###

    if not demo:
        # Fetch the mim files
        try:
            mim_files = fetch_mim_files(api_key,
                                        mim2genes=True,
                                        morbidmap=True,
                                        genemap2=True)
        except Exception as err:
            LOG.warning(err)
            context.abort()
        mim2gene_lines = mim_files['mim2genes']
        genemap_lines = mim_files['genemap2']

        # Fetch the genes to hpo information
        hpo_gene_lines = fetch_hpo_genes()
        # Fetch the latest version of the hgnc information
        hgnc_lines = fetch_hgnc()
        # Fetch the latest exac pli score information
        exac_lines = fetch_exac_constraint()

    else:
        mim2gene_lines = [
            line for line in get_file_handle(mim2gene_reduced_path)
        ]
        genemap_lines = [
            line for line in get_file_handle(genemap2_reduced_path)
        ]

        # Fetch the genes to hpo information
        hpo_gene_lines = [
            line for line in get_file_handle(hpogenes_reduced_path)
        ]
        # Fetch the reduced hgnc information
        hgnc_lines = [line for line in get_file_handle(hgnc_reduced_path)]
        # Fetch the latest exac pli score information
        exac_lines = [line for line in get_file_handle(exac_reduced_path)]

    builds = ['37', '38']
    ################## Load Genes and transcripts #######################
    #####################################################################
    for build in builds:
        # Fetch the ensembl information
        if not demo:
            ensembl_genes = fetch_ensembl_genes(build=build)
        else:
            ensembl_genes = get_file_handle(genes37_reduced_path)
        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim2gene_lines,
            genemap_lines=genemap_lines,
            hpo_lines=hpo_gene_lines,
            build=build,
        )

        # Create a map from ensembl ids to gene objects
        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj['ensembl_id']
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        if not demo:
            ensembl_transcripts = fetch_ensembl_transcripts(build=build)
        else:
            ensembl_transcripts = get_file_handle(transcripts37_reduced_path)
        # Load the transcripts for a certain build
        transcripts = load_transcripts(adapter, ensembl_transcripts, build,
                                       ensembl_genes)

    hpo_terms_handle = None
    hpo_to_genes_handle = None
    hpo_disease_handle = None
    if demo:
        hpo_terms_handle = get_file_handle(hpoterms_reduced_path)
        hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path)
        hpo_disease_handle = get_file_handle(
            hpo_phenotype_to_terms_reduced_path)

    load_hpo(adapter=adapter,
             hpo_lines=hpo_terms_handle,
             hpo_gene_lines=hpo_to_genes_handle,
             disease_lines=genemap_lines,
             hpo_disease_lines=hpo_disease_handle)

    # If demo we load a gene panel and some case information
    if demo:
        parsed_panel = parse_gene_panel(path=panel_path,
                                        institute='cust000',
                                        panel_id='panel1',
                                        version=1.0,
                                        display_name='Test panel')
        adapter.load_panel(parsed_panel)

        case_handle = get_file_handle(load_path)
        case_data = yaml.load(case_handle)

        adapter.load_case(case_data)

    LOG.info("Creating indexes")
    adapter.load_indexes()
    LOG.info("Scout instance setup successful")
示例#15
0
def database(context, institute_name, user_name, user_mail):
    """Setup a scout database"""
    log.info("Running scout setup database")

    institute_name = institute_name or context.obj['institute_name']
    user_name = user_name or context.obj['user_name']
    user_mail = user_mail or context.obj['user_mail']

    adapter = context.obj['adapter']

    log.info("Setting up database %s", context.obj['mongodb'])
    log.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        log.info("Deleting collection %s", collection_name)
        adapter.db.drop_collection(collection_name)
    log.info("Database deleted")

    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_name,
        display_name=institute_name,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_name]
            )

    adapter.add_user(user_obj)

    # Load the genes and transcripts
    hgnc_handle = context.obj['hgnc']
    transcripts37_handle = context.obj['transcripts37']
    transcripts38_handle = context.obj['transcripts38']
    exac_handle = context.obj['exac']
    hpo_genes_handle = context.obj['hpogenes']

    mim2gene_handle = context.obj['mim2gene']
    genemap_handle = context.obj['genemap2']

    genes37 = link_genes(
        ensembl_lines=transcripts37_handle,
        hgnc_lines=hgnc_handle,
        exac_lines=exac_handle,
        mim2gene_lines=mim2gene_handle,
        genemap_lines=genemap_handle,
        hpo_lines=hpo_genes_handle,
    )

    load_hgnc_genes(adapter, genes37, build='37')

    genes38 = link_genes(
        ensembl_lines=transcripts38_handle,
        hgnc_lines=context.obj['hgnc38'],
        exac_lines=context.obj['exac38'],
        mim2gene_lines=context.obj['mim2gene38'],
        genemap_lines=context.obj['genemap2_38'],
        hpo_lines=context.obj['hpogenes_38'],
    )

    load_hgnc_genes(adapter, genes38, build='38')

    hpo_terms_handle = context.obj['hpo_terms']
    disease_handle = context.obj['disease_terms']
    hpo_disease_handle = context.obj['hpodiseases']

    load_hpo(
        adapter=adapter,
        hpo_lines=hpo_terms_handle,
        disease_lines=disease_handle,
        hpo_disease_lines=hpo_disease_handle
    )

    log.info("Creating indexes")

    adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING),
                                          ('chromosome', pymongo.ASCENDING)])
    log.info("hgnc gene index created")

    log.info("Scout instance setup successful")
示例#16
0
def demo(context):
    """Setup a scout demo instance. This instance will be populated with a
       case a gene panel and some variants.
    """
    log.info("Running scout setup demo")
    institute_name = context.obj['institute_name']
    user_name = context.obj['user_name']
    user_mail = context.obj['user_mail']

    adapter = context.obj['adapter']

    log.info("Setting up database %s", context.obj['mongodb'])
    log.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        log.info("Deleting collection %s", collection_name)
        adapter.db.drop_collection(collection_name)
    log.info("Database deleted")

    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_name,
        display_name=institute_name,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_name]
            )

    adapter.add_user(user_obj)

    # Load the genes and transcripts
    hgnc_handle = context.obj['hgnc']
    transcripts37_handle = context.obj['transcripts37']
    # transcripts38_handle = context.obj['transcripts38']
    exac_handle = context.obj['exac']
    hpo_genes_handle = context.obj['hpogenes']
    mim2gene_handle = context.obj['mim2gene']
    genemap_handle = context.obj['genemap2']

    genes37 = link_genes(
        ensembl_lines=transcripts37_handle,
        hgnc_lines=hgnc_handle,
        exac_lines=exac_handle,
        mim2gene_lines=mim2gene_handle,
        genemap_lines=genemap_handle,
        hpo_lines=hpo_genes_handle,
    )

    load_hgnc_genes(adapter, genes37, build='37')

    hpo_terms_handle = context.obj['hpo_terms']
    disease_handle = context.obj['disease_terms']
    hpo_disease_handle = context.obj['hpodiseases']

    load_hpo(
        adapter=adapter,
        hpo_lines=hpo_terms_handle,
        disease_lines=disease_handle,
        hpo_disease_lines=hpo_disease_handle
    )

    panel_info = {
            'date': datetime.datetime.now(),
            'file': panel_path,
            'type': 'clinical',
            'institute': 'cust000',
            'version': '1.0',
            'panel_name': 'panel1',
            'full_name': 'Test panel'
        }

    parsed_panel = parse_gene_panel(panel_info)
    panel_obj = build_panel(parsed_panel, adapter)
    load_panel(
        adapter=adapter,
        panel_info=panel_info
    )

    case_handle = get_file_handle(load_path)
    case_data = yaml.load(case_handle)

    case_data['vcf_snv'] = clinical_snv_path
    case_data['vcf_sv'] = clinical_sv_path
    case_data['vcf_snv_research'] = research_snv_path
    case_data['vcf_sv_research'] = research_sv_path
    case_data['madeline'] = madeline_path

    load_scout(adapter, case_data)

    log.info("Creating indexes")

    adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING),
                                          ('chromosome', pymongo.ASCENDING)])
    log.info("hgnc gene index created")

    log.info("Scout demo instance setup successful")
示例#17
0
def setup_scout(adapter, institute_id='cust000', user_name='Clark Kent',
                user_mail='*****@*****.**', api_key=None, demo=False):
    """docstring for setup_scout"""
    ########################## Delete previous information ##########################
    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        if not collection_name.startswith('system'):
            LOG.info("Deleting collection %s", collection_name)
            adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    ########################## Add a institute ##########################
    #####################################################################
    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_id,
        display_name=institute_id,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    ########################## Add a User ###############################
    #####################################################################
    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_id]
            )

    adapter.add_user(user_obj)

    ### Get the mim information ###

    if not demo:
        # Fetch the mim files
        try:
            mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True)
        except Exception as err:
            LOG.warning(err)
            raise err
        mim2gene_lines = mim_files['mim2genes']
        genemap_lines = mim_files['genemap2']

        # Fetch the genes to hpo information
        hpo_gene_lines = fetch_hpo_genes()
        # Fetch the latest version of the hgnc information
        hgnc_lines = fetch_hgnc()
        # Fetch the latest exac pli score information
        exac_lines = fetch_exac_constraint()


    else:
        mim2gene_lines = [line for line in get_file_handle(mim2gene_reduced_path)]
        genemap_lines = [line for line in get_file_handle(genemap2_reduced_path)]

        # Fetch the genes to hpo information
        hpo_gene_lines = [line for line in get_file_handle(hpogenes_reduced_path)]
        # Fetch the reduced hgnc information
        hgnc_lines = [line for line in get_file_handle(hgnc_reduced_path)]
        # Fetch the latest exac pli score information
        exac_lines = [line for line in get_file_handle(exac_reduced_path)]


    builds = ['37', '38']
    ################## Load Genes and transcripts #######################
    #####################################################################
    for build in builds:
        # Fetch the ensembl information
        if not demo:
            ensembl_genes = fetch_ensembl_genes(build=build)
        else:
            ensembl_genes = get_file_handle(genes37_reduced_path)
        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim2gene_lines,
            genemap_lines=genemap_lines,
            hpo_lines=hpo_gene_lines,
            build=build,
        )

        # Create a map from ensembl ids to gene objects
        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj['ensembl_id']
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        if not demo:
            ensembl_transcripts = fetch_ensembl_transcripts(build=build)
        else:
            ensembl_transcripts = get_file_handle(transcripts37_reduced_path)
        # Load the transcripts for a certain build
        transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes)

    hpo_terms_handle = None
    hpo_to_genes_handle = None
    hpo_disease_handle = None
    if demo:
        hpo_terms_handle = get_file_handle(hpoterms_reduced_path)
        hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path)
        hpo_disease_handle = get_file_handle(hpo_phenotype_to_terms_reduced_path)

    load_hpo(
        adapter=adapter,
        hpo_lines=hpo_terms_handle,
        hpo_gene_lines=hpo_to_genes_handle,
        disease_lines=genemap_lines,
        hpo_disease_lines=hpo_disease_handle
    )

    # If demo we load a gene panel and some case information
    if demo:
        parsed_panel = parse_gene_panel(
            path=panel_path,
            institute='cust000',
            panel_id='panel1',
            version=1.0,
            display_name='Test panel'
        )
        adapter.load_panel(parsed_panel)

        case_handle = get_file_handle(load_path)
        case_data = yaml.load(case_handle, Loader=yaml.FullLoader)

        adapter.load_case(case_data)

    LOG.info("Creating indexes")
    adapter.load_indexes()
    LOG.info("Scout instance setup successful")