def test_load_transcripts(adapter, gene_bulk, transcripts_handle): # GIVEN a empty database assert sum(1 for i in adapter.all_genes()) == 0 assert sum(1 for i in adapter.transcripts()) == 0 # WHEN inserting a number of genes and some transcripts adapter.load_hgnc_bulk(gene_bulk) load_transcripts(adapter, transcripts_lines=transcripts_handle, build="37") # THEN assert all genes have been added to the database assert sum(1 for i in adapter.all_genes()) == len(gene_bulk) # THEN assert that the transcripts where loaded loaded assert sum(1 for i in adapter.transcripts()) > 0
def test_load_transcripts(adapter, gene_bulk, transcripts_handle): # GIVEN a empty database assert adapter.all_genes().count() == 0 assert adapter.transcripts().count() == 0 # WHEN inserting a number of genes and some transcripts adapter.load_hgnc_bulk(gene_bulk) load_transcripts(adapter, transcripts_lines=transcripts_handle, build='37') # THEN assert all genes have been added to the database assert adapter.all_genes().count() == len(gene_bulk) # THEN assert that the transcripts where loaded loaded assert adapter.transcripts().count() > 0
def test_load_transcripts_request(adapter, gene_bulk, transcripts_df): # GIVEN a empty database assert adapter.all_genes().count() == 0 assert adapter.transcripts().count() == 0 # WHEN inserting a number of genes and some transcripts adapter.load_hgnc_bulk(gene_bulk) load_transcripts(adapter, transcripts_lines=transcripts_df, build='37') # THEN assert all genes have been added to the database assert adapter.all_genes().count() == len(gene_bulk) # THEN assert that the transcripts where loaded loaded assert adapter.transcripts().count() > 0
def test_load_exons(adapter, gene_bulk, transcripts_file, exons_handle): # GIVEN a empty database assert sum(1 for i in adapter.all_genes()) == 0 assert sum(1 for i in adapter.transcripts()) == 0 assert sum(1 for i in adapter.exons()) == 0 # WHEN inserting a number of genes and some transcripts and the exons adapter.load_hgnc_bulk(gene_bulk) transcripts_handle = get_file_handle(transcripts_file) load_transcripts(adapter, transcripts_handle, build="37") adapter.transcript_collection.create_index([("build", pymongo.ASCENDING), ("hgnc_id", pymongo.ASCENDING) ]) load_exons(adapter, exons_handle, build="37", nr_exons=19826) assert sum(1 for i in adapter.exons()) > 1
def gene_database(request, institute_database, genes): "Returns an adapter to a database populated with user, institute, case and genes" adapter = institute_database gene_objs = load_hgnc_genes(adapter=adapter, genes=genes, build='37') LOG.info("Creating index on hgnc collection") adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING), ('hgnc_symbol', pymongo.ASCENDING)]) transcripts_handle = get_file_handle(transcripts37_reduced_path) load_transcripts(adapter, transcripts_handle, build='37') adapter.transcript_collection.create_index([('build', pymongo.ASCENDING), ('hgnc_id', pymongo.ASCENDING) ]) LOG.info("Index done") return adapter
def load_hgnc( adapter, genes=None, ensembl_lines=None, hgnc_lines=None, exac_lines=None, mim2gene_lines=None, genemap_lines=None, hpo_lines=None, transcripts_lines=None, build="37", omim_api_key="", ): """Load Genes and transcripts into the database If no resources are provided the correct ones will be fetched. Args: adapter(scout.adapter.MongoAdapter) genes(dict): If genes are already parsed ensembl_lines(iterable(str)): Lines formated with ensembl gene information hgnc_lines(iterable(str)): Lines with gene information from genenames.org exac_lines(iterable(str)): Lines with information pLi-scores from ExAC mim2gene(iterable(str)): Lines with map from omim id to gene symbol genemap_lines(iterable(str)): Lines with information of omim entries hpo_lines(iterable(str)): Lines information about map from hpo terms to genes transcripts_lines(iterable): iterable with ensembl transcript lines build(str): What build to use. Defaults to '37' """ gene_objs = load_hgnc_genes( adapter=adapter, genes=genes, ensembl_lines=ensembl_lines, hgnc_lines=hgnc_lines, exac_lines=exac_lines, mim2gene_lines=mim2gene_lines, genemap_lines=genemap_lines, hpo_lines=hpo_lines, build=build, omim_api_key=omim_api_key, ) ensembl_genes = {} for gene_obj in gene_objs: ensembl_genes[gene_obj["ensembl_id"]] = gene_obj transcript_objs = load_transcripts( adapter=adapter, transcripts_lines=transcripts_lines, build=build, ensembl_genes=ensembl_genes, )