示例#1
0
def test_load_transcripts(adapter, gene_bulk, transcripts_handle):
    # GIVEN a empty database
    assert sum(1 for i in adapter.all_genes()) == 0
    assert sum(1 for i in adapter.transcripts()) == 0

    # WHEN inserting a number of genes and some transcripts
    adapter.load_hgnc_bulk(gene_bulk)

    load_transcripts(adapter, transcripts_lines=transcripts_handle, build="37")

    # THEN assert all genes have been added to the database
    assert sum(1 for i in adapter.all_genes()) == len(gene_bulk)

    # THEN assert that the transcripts where loaded loaded
    assert sum(1 for i in adapter.transcripts()) > 0
def test_load_transcripts(adapter, gene_bulk, transcripts_handle):
    # GIVEN a empty database
    assert adapter.all_genes().count() == 0
    assert adapter.transcripts().count() == 0
    
    # WHEN inserting a number of genes and some transcripts
    adapter.load_hgnc_bulk(gene_bulk)

    load_transcripts(adapter, transcripts_lines=transcripts_handle, build='37')

    # THEN assert all genes have been added to the database
    assert adapter.all_genes().count() == len(gene_bulk)
    
    # THEN assert that the transcripts where loaded loaded
    assert adapter.transcripts().count() > 0
def test_load_transcripts_request(adapter, gene_bulk, transcripts_df):
    # GIVEN a empty database
    assert adapter.all_genes().count() == 0
    assert adapter.transcripts().count() == 0
    
    # WHEN inserting a number of genes and some transcripts
    adapter.load_hgnc_bulk(gene_bulk)

    load_transcripts(adapter, transcripts_lines=transcripts_df, build='37')

    # THEN assert all genes have been added to the database
    assert adapter.all_genes().count() == len(gene_bulk)
    
    # THEN assert that the transcripts where loaded loaded
    assert adapter.transcripts().count() > 0
示例#4
0
def test_load_exons(adapter, gene_bulk, transcripts_file, exons_handle):
    # GIVEN a empty database
    assert sum(1 for i in adapter.all_genes()) == 0
    assert sum(1 for i in adapter.transcripts()) == 0
    assert sum(1 for i in adapter.exons()) == 0

    # WHEN inserting a number of genes and some transcripts and the exons
    adapter.load_hgnc_bulk(gene_bulk)

    transcripts_handle = get_file_handle(transcripts_file)
    load_transcripts(adapter, transcripts_handle, build="37")

    adapter.transcript_collection.create_index([("build", pymongo.ASCENDING),
                                                ("hgnc_id", pymongo.ASCENDING)
                                                ])

    load_exons(adapter, exons_handle, build="37", nr_exons=19826)

    assert sum(1 for i in adapter.exons()) > 1
示例#5
0
def gene_database(request, institute_database, genes):
    "Returns an adapter to a database populated with user, institute, case and genes"
    adapter = institute_database

    gene_objs = load_hgnc_genes(adapter=adapter, genes=genes, build='37')

    LOG.info("Creating index on hgnc collection")
    adapter.hgnc_collection.create_index([('build', pymongo.ASCENDING),
                                          ('hgnc_symbol', pymongo.ASCENDING)])

    transcripts_handle = get_file_handle(transcripts37_reduced_path)
    load_transcripts(adapter, transcripts_handle, build='37')

    adapter.transcript_collection.create_index([('build', pymongo.ASCENDING),
                                                ('hgnc_id', pymongo.ASCENDING)
                                                ])

    LOG.info("Index done")

    return adapter
示例#6
0
def load_hgnc(
    adapter,
    genes=None,
    ensembl_lines=None,
    hgnc_lines=None,
    exac_lines=None,
    mim2gene_lines=None,
    genemap_lines=None,
    hpo_lines=None,
    transcripts_lines=None,
    build="37",
    omim_api_key="",
):
    """Load Genes and transcripts into the database

    If no resources are provided the correct ones will be fetched.

    Args:
        adapter(scout.adapter.MongoAdapter)
        genes(dict): If genes are already parsed
        ensembl_lines(iterable(str)): Lines formated with ensembl gene information
        hgnc_lines(iterable(str)): Lines with gene information from genenames.org
        exac_lines(iterable(str)): Lines with information pLi-scores from ExAC
        mim2gene(iterable(str)): Lines with map from omim id to gene symbol
        genemap_lines(iterable(str)): Lines with information of omim entries
        hpo_lines(iterable(str)): Lines information about map from hpo terms to genes
        transcripts_lines(iterable): iterable with ensembl transcript lines
        build(str): What build to use. Defaults to '37'

    """
    gene_objs = load_hgnc_genes(
        adapter=adapter,
        genes=genes,
        ensembl_lines=ensembl_lines,
        hgnc_lines=hgnc_lines,
        exac_lines=exac_lines,
        mim2gene_lines=mim2gene_lines,
        genemap_lines=genemap_lines,
        hpo_lines=hpo_lines,
        build=build,
        omim_api_key=omim_api_key,
    )

    ensembl_genes = {}
    for gene_obj in gene_objs:
        ensembl_genes[gene_obj["ensembl_id"]] = gene_obj

    transcript_objs = load_transcripts(
        adapter=adapter,
        transcripts_lines=transcripts_lines,
        build=build,
        ensembl_genes=ensembl_genes,
    )