def test_get_mim_phenotypes_file(genemap_handle): phenotypes = get_mim_phenotypes(genemap_lines=genemap_handle) for i, mim_nr in enumerate(phenotypes): assert phenotypes[mim_nr]['mim_number'] assert i > 0
def load_disease_terms(adapter, genemap_lines, genes, hpo_disease_lines): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt. Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() logger.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]['hpo_terms'] if hpo_terms: disease_info['hpo_terms'] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) logger.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) logger.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def test_get_mim_phenotypes_file(genemap_handle): phenotypes = get_mim_phenotypes(genemap_lines=genemap_handle) for i, mim_nr in enumerate(phenotypes): assert phenotypes[mim_nr]['mim_number'] assert i > 0
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ # Get a map with hgnc symbols to hgnc ids from scout if not genes: genes = adapter.genes_by_alias() # Fetch the disease terms from omim disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) if not hpo_disease_lines: hpo_disease_lines = fetch_hpo_to_genes_to_disease() hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() nr_diseases = None LOG.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]["hpo_terms"] if hpo_terms: disease_info["hpo_terms"] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None): """Load the omim phenotypes into the database Parse the phenotypes from genemap2.txt and find the associated hpo terms from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt. Args: adapter(MongoAdapter) genemap_lines(iterable(str)) genes(dict): Dictionary with all genes found in database hpo_disease_lines(iterable(str)) """ # Get a map with hgnc symbols to hgnc ids from scout if not genes: genes = adapter.genes_by_alias() # Fetch the disease terms from omim disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines) if not hpo_disease_lines: hpo_disease_lines = fetch_hpo_phenotype_to_terms() hpo_diseases = parse_hpo_diseases(hpo_disease_lines) start_time = datetime.now() nr_diseases = None LOG.info("Loading the hpo disease...") for nr_diseases, disease_number in enumerate(disease_terms): disease_info = disease_terms[disease_number] disease_id = "OMIM:{0}".format(disease_number) if disease_id in hpo_diseases: hpo_terms = hpo_diseases[disease_id]['hpo_terms'] if hpo_terms: disease_info['hpo_terms'] = hpo_terms disease_obj = build_disease_term(disease_info, genes) adapter.load_disease_term(disease_obj) LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases)) LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
def test_get_mim_phenotypes(): ## GIVEN a small testdata set # This will return a dictionary with mim number as keys and # phenotypes as values ## WHEN parsing the phenotypes phenotypes = get_mim_phenotypes(genemap_lines=GENEMAP_LINES) ## THEN assert they where parsed in a correct way # There was only one line in GENEMAP_LINES that have two phenotypes # so we expect that there should be two phenotypes assert len(phenotypes) == 2 term = phenotypes[615349] assert term['inheritance'] == set(['AR']) assert term['hgnc_symbols'] == set(['B3GALT6'])
def test_get_mim_phenotypes(): ## GIVEN a small testdata set # This will return a dictionary with mim number as keys and # phenotypes as values ## WHEN parsing the phenotypes phenotypes = get_mim_phenotypes(genemap_lines=GENEMAP_LINES) ## THEN assert they where parsed in a correct way # There was only one line in GENEMAP_LINES that have two phenotypes # so we expect that there should be two phenotypes assert len(phenotypes) == 2 term = phenotypes[615349] assert term['inheritance'] == set(['AR']) assert term['hgnc_symbols'] == set(['B3GALT6'])