def real_gene_database( request, real_institute_database, genes37_handle, hgnc_handle, exac_handle, mim2gene_handle, genemap_handle, hpo_genes_handle, ): "Returns an adapter to a database populated with user, institute, case and genes" adapter = real_institute_database load_hgnc_genes( adapter=adapter, ensembl_lines=genes37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, build="37", ) LOG.info("Creating index on hgnc collection") adapter.hgnc_collection.create_index([("build", pymongo.ASCENDING), ("hgnc_symbol", pymongo.ASCENDING)]) LOG.info("Index done") return adapter
def real_gene_database(request, real_institute_database, genes): "Returns an adapter to a database populated with user, institute and case" adapter = real_institute_database load_hgnc_genes(adapter, genes) logger.info("Creating index on hgnc collection") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('hgnc_symbol', pymongo.ASCENDING)]) logger.info("Index done") return adapter
def genes(context, build, api_key): """ Load the hgnc aliases to the mongo database. """ adapter = context.obj['adapter'] # Fetch the omim information api_key = api_key or context.obj.get('omim_api_key') if not api_key: LOG.warning("Please provide a omim api key to load the omim gene panel") context.abort() try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) context.abort() LOG.warning("Dropping all gene information") adapter.drop_genes(build) LOG.info("Genes dropped") hpo_genes = fetch_hpo_genes() if build: builds = [build] else: builds = ['37', '38'] for build in builds: LOG.info("Loading hgnc file from {0}".format(hgnc_path)) hgnc_handle = get_file_handle(hgnc_path) ensembl_handle = None if build == '37': ensembl_handle = get_file_handle(transcripts37_path) elif build == '38': ensembl_handle = get_file_handle(transcripts38_path) LOG.info("Loading exac gene file from {0}".format(exac_path)) exac_handle = get_file_handle(exac_path) genes = link_genes( ensembl_lines=ensembl_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes ) load_hgnc_genes(adapter=adapter, genes=genes, build=build)
def genes(ctx, update, build): """ Load the hgnc aliases to the mongo database. """ adapter = ctx.obj['adapter'] # Test if the genes are loaded nr_present_genes = adapter.nr_genes(build=build) if nr_present_genes > 0: if update: logger.warning("Dropping all gene information") adapter.drop_genes() logger.info("Genes dropped") else: logger.info("Genes are already loaded") logger.info("If you wish to update genes use '--update'") ctx.abort() logger.info("Loading hgnc file from {0}".format(hgnc_path)) hgnc_handle = get_file_handle(hgnc_path) if build == '37': logger.info("Loading ensembl transcript file from {0}".format( transcripts37_path)) ensembl_handle = get_file_handle(transcripts37_path) else: ensembl_handle = get_file_handle(transcripts38_path) logger.info("Loading exac gene file from {0}".format( exac_path)) exac_handle = get_file_handle(exac_path) logger.info("Loading mim information from files {0}, {1}".format( mim2gene_path, genemap2_path)) mim2gene_handle = get_file_handle(mim2gene_path) genemap_handle = get_file_handle(genemap2_path) hpo_handle = get_file_handle(hpogenes_path) genes = link_genes( ensembl_lines=ensembl_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_handle ) load_hgnc_genes(adapter=adapter, genes=genes, build=build)
def real_gene_database( real_institute_database, genes37_handle, hgnc_handle, exac_handle, mim2gene_handle, genemap_handle, hpo_genes_handle, ): "Returns an adapter to a database populated with user, institute, case and genes" adapter = real_institute_database load_hgnc_genes( adapter=adapter, ensembl_lines=genes37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, build="37", ) return adapter
def gene_database(request, institute_database, genes): "Returns an adapter to a database populated with user, institute, case and genes" adapter = institute_database gene_objs = load_hgnc_genes(adapter=adapter, genes=genes, build='37') LOG.info("Creating index on hgnc collection") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('hgnc_symbol', pymongo.ASCENDING)]) transcripts_handle = get_file_handle(transcripts37_reduced_path) load_transcripts(adapter, transcripts_handle, build='37') adapter.transcript_collection.create_index([('build', pymongo.ASCENDING), ('hgnc_id', pymongo.ASCENDING) ]) LOG.info("Index done") return adapter
def setup_scout( adapter, institute_id="cust000", user_name="Clark Kent", user_mail="*****@*****.**", api_key=None, demo=False, resource_files=None, ): """Function to setup a working scout instance. WARNING: If the instance is populated all collections will be deleted Build insert a institute and an admin user. There are multiple sources of information that is used by scout and that needs to exist for scout to work proper. Genes: Scout uses HGNC as the source for gene identifiers en ensembl as source for coordinates. Additional information of disease connections for genes if fetched from OMIM. Link between hpo terms and genes is fetched from HPO For more details check the documentation. """ LOG.info("Check if there was a database, delete if existing") existing_database = False for collection_name in adapter.db.list_collection_names(): if collection_name.startswith("system"): continue LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) existing_database = True if existing_database: LOG.info("Database deleted") institute_obj = build_institute( internal_id=institute_id, display_name=institute_id, sanger_recipients=[user_mail], ) adapter.add_institute(institute_obj) user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=["admin"], institutes=[institute_id], ) adapter.add_user(user_obj) resource_files = resource_files or {} if demo: resource_files = demo_files mim2gene_lines = None genemap_lines = None mim2gene_path = resource_files.get("mim2gene_path") genemap_path = resource_files.get("genemap_path") if genemap_path and mim2gene_path: mim2gene_lines = [line for line in get_file_handle(mim2gene_path)] genemap_lines = [line for line in get_file_handle(genemap_path)] if (genemap_lines is None) and api_key: try: mim_files = fetch_mim_files(api_key, mim2genes=True, genemap2=True) except Exception as err: LOG.warning(err) raise err mim2gene_lines = mim_files["mim2genes"] genemap_lines = mim_files["genemap2"] if resource_files.get("hpogenes_path"): hpo_gene_lines = [ line for line in get_file_handle(resource_files.get("hpogenes_path")) ] else: hpo_gene_lines = fetch_genes_to_hpo_to_disease() if resource_files.get("hgnc_path"): hgnc_lines = [ line for line in get_file_handle(resource_files.get("hgnc_path")) ] else: hgnc_lines = fetch_hgnc() if resource_files.get("exac_path"): exac_lines = [ line for line in get_file_handle(resource_files.get("exac_path")) ] else: exac_lines = fetch_exac_constraint() # Load cytobands into cytoband collection for genome_build, cytobands_path in cytoband_files.items(): load_cytobands(cytobands_path, genome_build, adapter) builds = ["37", "38"] for build in builds: genes_path = "genes{}_path".format(build) if resource_files.get(genes_path): ensembl_genes = get_file_handle(resource_files[genes_path]) else: ensembl_genes = fetch_ensembl_genes(build=build) hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim2gene_lines, genemap_lines=genemap_lines, hpo_lines=hpo_gene_lines, build=build, ) # Create a map from ensembl ids to gene objects ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj["ensembl_id"] ensembl_genes[ensembl_id] = gene_obj tx_path = "transcripts{}_path".format(build) if resource_files.get(tx_path): ensembl_transcripts = get_file_handle(resource_files[tx_path]) else: ensembl_transcripts = fetch_ensembl_transcripts(build=build) # Load the transcripts for a certain build transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) hpo_terms_handle = None if resource_files.get("hpoterms_path"): hpo_terms_handle = get_file_handle(resource_files["hpoterms_path"]) hpo_to_genes_handle = None if resource_files.get("hpo_to_genes_path"): hpo_to_genes_handle = get_file_handle( resource_files["hpo_to_genes_path"]) hpo_disease_handle = None if resource_files.get("hpo_disease_path"): hpo_disease_handle = get_file_handle( resource_files["hpo_disease_path"]) load_hpo( adapter=adapter, disease_lines=genemap_lines, hpo_lines=hpo_terms_handle, hpo_gene_lines=hpo_to_genes_handle, ) # If demo we load a gene panel and some case information if demo: parsed_panel = parse_gene_panel( path=panel_path, institute="cust000", panel_id="panel1", version=1.0, display_name="Test panel", ) adapter.load_panel(parsed_panel) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle, Loader=yaml.FullLoader) config_data = parse_case_data(config=case_data) adapter.load_case(config_data) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout instance setup successful")
def genes(build, api_key): """ Load the hgnc aliases to the mongo database. """ LOG.info("Running scout update genes") adapter = store # Fetch the omim information api_key = api_key or current_app.config.get('OMIM_API_KEY') if not api_key: LOG.warning( "Please provide a omim api key to load the omim gene panel") raise click.Abort() try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) raise click.Abort() LOG.warning("Dropping all gene information") adapter.drop_genes(build) LOG.info("Genes dropped") LOG.warning("Dropping all transcript information") adapter.drop_transcripts(build) LOG.info("transcripts dropped") hpo_genes = fetch_hpo_genes() if build: builds = [build] else: builds = ['37', '38'] hgnc_lines = fetch_hgnc() exac_lines = fetch_exac_constraint() for build in builds: ensembl_genes = fetch_ensembl_genes(build=build) # load the genes hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes, build=build, ) ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj['ensembl_id'] ensembl_genes[ensembl_id] = gene_obj # Fetch the transcripts from ensembl ensembl_transcripts = fetch_ensembl_transcripts(build=build) transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) adapter.update_indexes() LOG.info("Genes, transcripts and Exons loaded")
def genes(context, build, api_key): """ Load the hgnc aliases to the mongo database. """ LOG.info("Running scout update genes") adapter = context.obj['adapter'] # Fetch the omim information api_key = api_key or context.obj.get('omim_api_key') if not api_key: LOG.warning("Please provide a omim api key to load the omim gene panel") context.abort() try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) context.abort() LOG.warning("Dropping all gene information") adapter.drop_genes(build) LOG.info("Genes dropped") LOG.warning("Dropping all transcript information") adapter.drop_transcripts(build) LOG.info("transcripts dropped") hpo_genes = fetch_hpo_genes() if build: builds = [build] else: builds = ['37', '38'] hgnc_lines = fetch_hgnc() exac_lines = fetch_exac_constraint() for build in builds: ensembl_genes = fetch_ensembl_genes(build=build) # load the genes hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes, build=build, ) ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj['ensembl_id'] ensembl_genes[ensembl_id] = gene_obj # Fetch the transcripts from ensembl ensembl_transcripts = fetch_ensembl_transcripts(build=build) transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) adapter.update_indexes() LOG.info("Genes, transcripts and Exons loaded")
def genes(build, api_key): """ Load the hgnc aliases to the mongo database. """ LOG.info("Running scout update genes") adapter = store # Fetch the omim information api_key = api_key or current_app.config.get("OMIM_API_KEY") mim_files = {} if not api_key: LOG.warning("No omim api key provided, Please not that some information will be missing") else: try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) raise click.Abort() LOG.warning("Dropping all gene information") adapter.drop_genes(build) LOG.info("Genes dropped") LOG.warning("Dropping all transcript information") adapter.drop_transcripts(build) LOG.info("transcripts dropped") hpo_genes = fetch_genes_to_hpo_to_disease() if build: builds = [build] else: builds = ["37", "38"] hgnc_lines = fetch_hgnc() exac_lines = fetch_exac_constraint() for build in builds: ensembl_genes = fetch_ensembl_genes(build=build) # load the genes hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim_files.get("mim2genes"), genemap_lines=mim_files.get("genemap2"), hpo_lines=hpo_genes, build=build, ) ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj["ensembl_id"] ensembl_genes[ensembl_id] = gene_obj # Fetch the transcripts from ensembl ensembl_transcripts = fetch_ensembl_transcripts(build=build) transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) adapter.update_indexes() LOG.info("Genes, transcripts and Exons loaded")
def database(context, institute_name, user_name, user_mail): """Setup a scout database""" log.info("Running scout setup database") institute_name = institute_name or context.obj['institute_name'] user_name = user_name or context.obj['user_name'] user_mail = user_mail or context.obj['user_mail'] adapter = context.obj['adapter'] log.info("Setting up database %s", context.obj['mongodb']) log.info("Deleting previous database") for collection_name in adapter.db.collection_names(): log.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) log.info("Database deleted") # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Load the genes and transcripts hgnc_handle = context.obj['hgnc'] transcripts37_handle = context.obj['transcripts37'] transcripts38_handle = context.obj['transcripts38'] exac_handle = context.obj['exac'] hpo_genes_handle = context.obj['hpogenes'] mim2gene_handle = context.obj['mim2gene'] genemap_handle = context.obj['genemap2'] genes37 = link_genes( ensembl_lines=transcripts37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) load_hgnc_genes(adapter, genes37, build='37') genes38 = link_genes( ensembl_lines=transcripts38_handle, hgnc_lines=context.obj['hgnc38'], exac_lines=context.obj['exac38'], mim2gene_lines=context.obj['mim2gene38'], genemap_lines=context.obj['genemap2_38'], hpo_lines=context.obj['hpogenes_38'], ) load_hgnc_genes(adapter, genes38, build='38') hpo_terms_handle = context.obj['hpo_terms'] disease_handle = context.obj['disease_terms'] hpo_disease_handle = context.obj['hpodiseases'] load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, disease_lines=disease_handle, hpo_disease_lines=hpo_disease_handle ) log.info("Creating indexes") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('chromosome', pymongo.ASCENDING)]) log.info("hgnc gene index created") log.info("Scout instance setup successful")
def setup_scout(adapter, institute_id='cust000', user_name='Clark Kent', user_mail='*****@*****.**', api_key=None, demo=False): """docstring for setup_scout""" ########################## Delete previous information ########################## LOG.info("Deleting previous database") for collection_name in adapter.db.collection_names(): if not collection_name.startswith('system'): LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) LOG.info("Database deleted") ########################## Add a institute ########################## ##################################################################### # Build a institute with id institute_name institute_obj = build_institute(internal_id=institute_id, display_name=institute_id, sanger_recipients=[user_mail]) # Add the institute to database adapter.add_institute(institute_obj) ########################## Add a User ############################### ##################################################################### # Build a user obj user_obj = dict(_id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_id]) adapter.add_user(user_obj) ### Get the mim information ### if not demo: # Fetch the mim files try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) context.abort() mim2gene_lines = mim_files['mim2genes'] genemap_lines = mim_files['genemap2'] # Fetch the genes to hpo information hpo_gene_lines = fetch_hpo_genes() # Fetch the latest version of the hgnc information hgnc_lines = fetch_hgnc() # Fetch the latest exac pli score information exac_lines = fetch_exac_constraint() else: mim2gene_lines = [ line for line in get_file_handle(mim2gene_reduced_path) ] genemap_lines = [ line for line in get_file_handle(genemap2_reduced_path) ] # Fetch the genes to hpo information hpo_gene_lines = [ line for line in get_file_handle(hpogenes_reduced_path) ] # Fetch the reduced hgnc information hgnc_lines = [line for line in get_file_handle(hgnc_reduced_path)] # Fetch the latest exac pli score information exac_lines = [line for line in get_file_handle(exac_reduced_path)] builds = ['37', '38'] ################## Load Genes and transcripts ####################### ##################################################################### for build in builds: # Fetch the ensembl information if not demo: ensembl_genes = fetch_ensembl_genes(build=build) else: ensembl_genes = get_file_handle(genes37_reduced_path) # load the genes hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim2gene_lines, genemap_lines=genemap_lines, hpo_lines=hpo_gene_lines, build=build, ) # Create a map from ensembl ids to gene objects ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj['ensembl_id'] ensembl_genes[ensembl_id] = gene_obj # Fetch the transcripts from ensembl if not demo: ensembl_transcripts = fetch_ensembl_transcripts(build=build) else: ensembl_transcripts = get_file_handle(transcripts37_reduced_path) # Load the transcripts for a certain build transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) hpo_terms_handle = None hpo_to_genes_handle = None hpo_disease_handle = None if demo: hpo_terms_handle = get_file_handle(hpoterms_reduced_path) hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path) hpo_disease_handle = get_file_handle( hpo_phenotype_to_terms_reduced_path) load_hpo(adapter=adapter, hpo_lines=hpo_terms_handle, hpo_gene_lines=hpo_to_genes_handle, disease_lines=genemap_lines, hpo_disease_lines=hpo_disease_handle) # If demo we load a gene panel and some case information if demo: parsed_panel = parse_gene_panel(path=panel_path, institute='cust000', panel_id='panel1', version=1.0, display_name='Test panel') adapter.load_panel(parsed_panel) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle) adapter.load_case(case_data) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout instance setup successful")
def demo(context): """Setup a scout demo instance. This instance will be populated with a case a gene panel and some variants. """ log.info("Running scout setup demo") institute_name = context.obj['institute_name'] user_name = context.obj['user_name'] user_mail = context.obj['user_mail'] adapter = context.obj['adapter'] log.info("Setting up database %s", context.obj['mongodb']) log.info("Deleting previous database") for collection_name in adapter.db.collection_names(): log.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) log.info("Database deleted") # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Load the genes and transcripts hgnc_handle = context.obj['hgnc'] transcripts37_handle = context.obj['transcripts37'] # transcripts38_handle = context.obj['transcripts38'] exac_handle = context.obj['exac'] hpo_genes_handle = context.obj['hpogenes'] mim2gene_handle = context.obj['mim2gene'] genemap_handle = context.obj['genemap2'] genes37 = link_genes( ensembl_lines=transcripts37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) load_hgnc_genes(adapter, genes37, build='37') hpo_terms_handle = context.obj['hpo_terms'] disease_handle = context.obj['disease_terms'] hpo_disease_handle = context.obj['hpodiseases'] load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, disease_lines=disease_handle, hpo_disease_lines=hpo_disease_handle ) panel_info = { 'date': datetime.datetime.now(), 'file': panel_path, 'type': 'clinical', 'institute': 'cust000', 'version': '1.0', 'panel_name': 'panel1', 'full_name': 'Test panel' } parsed_panel = parse_gene_panel(panel_info) panel_obj = build_panel(parsed_panel, adapter) load_panel( adapter=adapter, panel_info=panel_info ) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle) case_data['vcf_snv'] = clinical_snv_path case_data['vcf_sv'] = clinical_sv_path case_data['vcf_snv_research'] = research_snv_path case_data['vcf_sv_research'] = research_sv_path case_data['madeline'] = madeline_path load_scout(adapter, case_data) log.info("Creating indexes") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('chromosome', pymongo.ASCENDING)]) log.info("hgnc gene index created") log.info("Scout demo instance setup successful")
def demo(context): """Setup a scout demo instance. This instance will be populated with a case a gene panel and some variants. """ LOG.info("Running scout setup demo") institute_name = context.obj['institute_name'] user_name = context.obj['user_name'] user_mail = context.obj['user_mail'] adapter = context.obj['adapter'] LOG.info("Setting up database %s", context.obj['mongodb']) LOG.info("Deleting previous database") for collection_name in adapter.db.collection_names(): LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) LOG.info("Database deleted") # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Load the genes and transcripts LOG.info("Loading hgnc genes from %s", hgnc_reduced_path) hgnc_handle = get_file_handle(hgnc_reduced_path) hgnc38 = get_file_handle(hgnc_reduced_path) LOG.info("Loading exac genes from %s", exac_reduced_path) exac_handle = get_file_handle(exac_reduced_path) exac38 = get_file_handle(exac_reduced_path) LOG.info("Loading mim2gene info from %s", mim2gene_reduced_path) mim2gene_handle = get_file_handle(mim2gene_reduced_path) mim2gene38 = get_file_handle(mim2gene_reduced_path) LOG.info("Loading genemap info from %s", genemap2_reduced_path) genemap_handle = get_file_handle(genemap2_reduced_path) genemap38 = get_file_handle(genemap2_reduced_path) LOG.info("Loading hpo gene info from %s", hpogenes_reduced_path) hpo_genes_handle = get_file_handle(hpogenes_reduced_path) hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path) hpogenes38 = get_file_handle(hpogenes_reduced_path) LOG.info("Loading hpo disease info from %s", hpo_phenotype_to_terms_reduced_path) hpo_disease_handle = get_file_handle(hpo_phenotype_to_terms_reduced_path) LOG.info("Loading hpo terms from %s", hpoterms_reduced_path) hpo_terms_handle = get_file_handle(hpoterms_reduced_path) LOG.info("Loading omim disease info from %s", genemap2_reduced_path) disease_handle = get_file_handle(genemap2_reduced_path) LOG.info("Loading transcripts build 37 info from %s", transcripts37_reduced_path) transcripts37_handle = get_file_handle(transcripts37_reduced_path) transcripts38_handle = get_file_handle(transcripts38_reduced_path) genes37 = link_genes( ensembl_lines=transcripts37_handle, hgnc_lines=hgnc_handle, exac_lines=exac_handle, mim2gene_lines=mim2gene_handle, genemap_lines=genemap_handle, hpo_lines=hpo_genes_handle, ) load_hgnc_genes(adapter, genes37, build='37') load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, hpo_gene_lines=hpo_to_genes_handle, disease_lines=disease_handle, hpo_disease_lines=hpo_disease_handle ) adapter.load_panel( path=panel_path, institute='cust000', panel_id='panel1', date=datetime.datetime.now(), panel_type='clinical', version=1.0, display_name='Test panel' ) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle) adapter.load_case(case_data) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout demo instance setup successful")
def setup_scout(adapter, institute_id='cust000', user_name='Clark Kent', user_mail='*****@*****.**', api_key=None, demo=False): """docstring for setup_scout""" ########################## Delete previous information ########################## LOG.info("Deleting previous database") for collection_name in adapter.db.collection_names(): if not collection_name.startswith('system'): LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) LOG.info("Database deleted") ########################## Add a institute ########################## ##################################################################### # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_id, display_name=institute_id, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) ########################## Add a User ############################### ##################################################################### # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_id] ) adapter.add_user(user_obj) ### Get the mim information ### if not demo: # Fetch the mim files try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) raise err mim2gene_lines = mim_files['mim2genes'] genemap_lines = mim_files['genemap2'] # Fetch the genes to hpo information hpo_gene_lines = fetch_hpo_genes() # Fetch the latest version of the hgnc information hgnc_lines = fetch_hgnc() # Fetch the latest exac pli score information exac_lines = fetch_exac_constraint() else: mim2gene_lines = [line for line in get_file_handle(mim2gene_reduced_path)] genemap_lines = [line for line in get_file_handle(genemap2_reduced_path)] # Fetch the genes to hpo information hpo_gene_lines = [line for line in get_file_handle(hpogenes_reduced_path)] # Fetch the reduced hgnc information hgnc_lines = [line for line in get_file_handle(hgnc_reduced_path)] # Fetch the latest exac pli score information exac_lines = [line for line in get_file_handle(exac_reduced_path)] builds = ['37', '38'] ################## Load Genes and transcripts ####################### ##################################################################### for build in builds: # Fetch the ensembl information if not demo: ensembl_genes = fetch_ensembl_genes(build=build) else: ensembl_genes = get_file_handle(genes37_reduced_path) # load the genes hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_genes, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim2gene_lines, genemap_lines=genemap_lines, hpo_lines=hpo_gene_lines, build=build, ) # Create a map from ensembl ids to gene objects ensembl_genes = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj['ensembl_id'] ensembl_genes[ensembl_id] = gene_obj # Fetch the transcripts from ensembl if not demo: ensembl_transcripts = fetch_ensembl_transcripts(build=build) else: ensembl_transcripts = get_file_handle(transcripts37_reduced_path) # Load the transcripts for a certain build transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes) hpo_terms_handle = None hpo_to_genes_handle = None hpo_disease_handle = None if demo: hpo_terms_handle = get_file_handle(hpoterms_reduced_path) hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path) hpo_disease_handle = get_file_handle(hpo_phenotype_to_terms_reduced_path) load_hpo( adapter=adapter, hpo_lines=hpo_terms_handle, hpo_gene_lines=hpo_to_genes_handle, disease_lines=genemap_lines, hpo_disease_lines=hpo_disease_handle ) # If demo we load a gene panel and some case information if demo: parsed_panel = parse_gene_panel( path=panel_path, institute='cust000', panel_id='panel1', version=1.0, display_name='Test panel' ) adapter.load_panel(parsed_panel) case_handle = get_file_handle(load_path) case_data = yaml.load(case_handle, Loader=yaml.FullLoader) adapter.load_case(case_data) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout instance setup successful")
def genes(build, downloads_folder, api_key): """ Load the hgnc aliases to the mongo database. """ LOG.info("Running scout update genes") adapter = store builds = [build] if build else ["37", "38"] api_key = api_key or current_app.config.get("OMIM_API_KEY") resources = {} # If required resources are missing, download them to a temporary directory if downloads_folder is None: with tempfile.TemporaryDirectory() as tempdir: try: download_resources(tempdir, api_key, builds) except Exception as ex: LOG.error(ex) fetch_downloaded_resources(resources, tempdir, builds) else: # If resources have been previosly downloaded, read those file and return their lines fetch_downloaded_resources(resources, downloads_folder, builds) # Load genes and transcripts info for genome_build in builds: LOG.warning("Dropping all gene information") adapter.drop_genes(genome_build) LOG.warning("Dropping all transcript information") adapter.drop_transcripts(genome_build) ensembl_gene_res = ( resources.get("ensembl_genes_37") if genome_build == "37" else resources.get("ensembl_genes_38") ) # It will be none if everything needs to be downloaded # Load the genes hgnc_genes = load_hgnc_genes( adapter=adapter, ensembl_lines=ensembl_gene_res, hgnc_lines=resources.get("hgnc_lines"), exac_lines=resources.get("exac_lines"), mim2gene_lines=resources.get("mim2genes"), genemap_lines=resources.get("genemap2"), hpo_lines=resources.get("hpo_genes"), build=genome_build, ) ensembl_genes_dict = {} for gene_obj in hgnc_genes: ensembl_id = gene_obj["ensembl_id"] ensembl_genes_dict[ensembl_id] = gene_obj # Load the transcripts ensembl_tx_res = ( resources.get("ensembl_transcripts_37") if genome_build == "37" else resources.get("ensembl_transcripts_38") ) # It will be none if everything needs to be downloaded load_transcripts(adapter, ensembl_tx_res, genome_build, ensembl_genes_dict) LOG.info("Genes and transcripts loaded")
def database(context, institute_name, user_name, user_mail, api_key): """Setup a scout database""" LOG.info("Running scout setup database") # Fetch the omim information api_key = api_key or context.obj.get('omim_api_key') if not api_key: LOG.warning("Please provide a omim api key to load the omim gene panel") context.abort() try: mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True) except Exception as err: LOG.warning(err) context.abort() # for fn in mim_files: # click.echo("{0}: {1}".format(fn, type(mim_files[fn]))) # # context.abort() institute_name = institute_name or context.obj['institute_name'] user_name = user_name or context.obj['user_name'] user_mail = user_mail or context.obj['user_mail'] adapter = context.obj['adapter'] LOG.info("Deleting previous database") for collection_name in adapter.db.collection_names(): if not collection_name.startswith('system'): LOG.info("Deleting collection %s", collection_name) adapter.db.drop_collection(collection_name) LOG.info("Database deleted") LOG.info("Setting up database %s", context.obj['mongodb']) # Build a institute with id institute_name institute_obj = build_institute( internal_id=institute_name, display_name=institute_name, sanger_recipients=[user_mail] ) # Add the institute to database adapter.add_institute(institute_obj) # Build a user obj user_obj = dict( _id=user_mail, email=user_mail, name=user_name, roles=['admin'], institutes=[institute_name] ) adapter.add_user(user_obj) # Fetch the genes to hpo information hpo_genes = fetch_hpo_genes() # Load the genes and transcripts genes37 = link_genes( ensembl_lines=get_file_handle(transcripts37_path), hgnc_lines=get_file_handle(hgnc_path), exac_lines=get_file_handle(exac_path), mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes, ) load_hgnc_genes(adapter, genes37, build='37') genes38 = link_genes( ensembl_lines=get_file_handle(transcripts38_path), hgnc_lines=get_file_handle(hgnc_path), exac_lines=get_file_handle(exac_path), mim2gene_lines=mim_files['mim2genes'], genemap_lines=mim_files['genemap2'], hpo_lines=hpo_genes, ) load_hgnc_genes(adapter, genes38, build='38') load_hpo( adapter=adapter, disease_lines=mim_files['genemap2'], ) LOG.info("Creating indexes") adapter.load_indexes() LOG.info("Scout instance setup successful")