Exemplo n.º 1
0
def test_get_mim_phenotypes_file(genemap_handle):
    phenotypes = get_mim_phenotypes(genemap_lines=genemap_handle)

    for i, mim_nr in enumerate(phenotypes):
        assert phenotypes[mim_nr]['mim_number']

    assert i > 0
Exemplo n.º 2
0
def load_disease_terms(adapter, genemap_lines, genes, hpo_disease_lines):
    """Load the omim phenotypes into the database
    
    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt.

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """

    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()

    logger.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)

        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]['hpo_terms']
            if hpo_terms:
                disease_info['hpo_terms'] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    logger.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    logger.info("Time to load diseases: {0}".format(datetime.now() -
                                                    start_time))
Exemplo n.º 3
0
def test_get_mim_phenotypes_file(genemap_handle):
    phenotypes = get_mim_phenotypes(genemap_lines=genemap_handle)
    
    for i, mim_nr in enumerate(phenotypes):
        assert phenotypes[mim_nr]['mim_number']

    assert i > 0
        
Exemplo n.º 4
0
def load_disease_terms(adapter,
                       genemap_lines,
                       genes=None,
                       hpo_disease_lines=None):
    """Load the omim phenotypes into the database

    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """
    # Get a map with hgnc symbols to hgnc ids from scout
    if not genes:
        genes = adapter.genes_by_alias()

    # Fetch the disease terms from omim
    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)

    if not hpo_disease_lines:
        hpo_disease_lines = fetch_hpo_to_genes_to_disease()
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()
    nr_diseases = None

    LOG.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)

        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]["hpo_terms"]
            if hpo_terms:
                disease_info["hpo_terms"] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
Exemplo n.º 5
0
def load_disease_terms(adapter, genemap_lines, genes=None, hpo_disease_lines=None):
    """Load the omim phenotypes into the database
    
    Parse the phenotypes from genemap2.txt and find the associated hpo terms
    from ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt.

    Args:
        adapter(MongoAdapter)
        genemap_lines(iterable(str))
        genes(dict): Dictionary with all genes found in database
        hpo_disease_lines(iterable(str))

    """
    # Get a map with hgnc symbols to hgnc ids from scout
    if not genes:
        genes = adapter.genes_by_alias()

    # Fetch the disease terms from omim
    disease_terms = get_mim_phenotypes(genemap_lines=genemap_lines)

    if not hpo_disease_lines:
        hpo_disease_lines = fetch_hpo_phenotype_to_terms()
    hpo_diseases = parse_hpo_diseases(hpo_disease_lines)

    start_time = datetime.now()
    nr_diseases = None

    LOG.info("Loading the hpo disease...")
    for nr_diseases, disease_number in enumerate(disease_terms):
        disease_info = disease_terms[disease_number]
        disease_id = "OMIM:{0}".format(disease_number)
        
        if disease_id in hpo_diseases:
            hpo_terms = hpo_diseases[disease_id]['hpo_terms']
            if hpo_terms:
                disease_info['hpo_terms'] = hpo_terms
        disease_obj = build_disease_term(disease_info, genes)

        adapter.load_disease_term(disease_obj)

    LOG.info("Loading done. Nr of diseases loaded {0}".format(nr_diseases))
    LOG.info("Time to load diseases: {0}".format(datetime.now() - start_time))
Exemplo n.º 6
0
def test_get_mim_phenotypes():
    ## GIVEN a small testdata set

    # This will return a dictionary with mim number as keys and
    # phenotypes as values

    ## WHEN parsing the phenotypes
    phenotypes = get_mim_phenotypes(genemap_lines=GENEMAP_LINES)

    ## THEN assert they where parsed in a correct way

    # There was only one line in GENEMAP_LINES that have two phenotypes
    # so we expect that there should be two phenotypes

    assert len(phenotypes) == 2

    term = phenotypes[615349]

    assert term['inheritance'] == set(['AR'])
    assert term['hgnc_symbols'] == set(['B3GALT6'])
Exemplo n.º 7
0
def test_get_mim_phenotypes():
    ## GIVEN a small testdata set
    
    # This will return a dictionary with mim number as keys and
    # phenotypes as values
    
    ## WHEN parsing the phenotypes
    phenotypes = get_mim_phenotypes(genemap_lines=GENEMAP_LINES)

    ## THEN assert they where parsed in a correct way
    
    # There was only one line in GENEMAP_LINES that have two phenotypes
    # so we expect that there should be two phenotypes

    assert len(phenotypes) == 2

    term = phenotypes[615349]

    assert term['inheritance'] == set(['AR'])
    assert term['hgnc_symbols'] == set(['B3GALT6'])