Exemplo n.º 1
0
def add_ensembl_info(genes, ensembl_lines):
    """Add the coordinates from ensembl

    Args:
        genes(dict): Dictionary with all genes
        ensembl_lines(iteable): Iteable with raw ensembl info
    """

    LOG.info("Adding ensembl coordinates")
    # Parse and add the ensembl gene info
    ensembl_genes = parse_ensembl_genes(ensembl_lines)

    for ensembl_gene in ensembl_genes:
        if not "hgnc_id" in ensembl_gene:
            LOG.debug(
                "Ensembl gene %s is missing hgnc id. Skipping",
                ensembl_gene.get("ensembl_gene_id"),
            )
            continue
        gene_obj = genes.get(ensembl_gene["hgnc_id"])
        if not gene_obj:
            continue
        gene_obj["chromosome"] = ensembl_gene["chrom"]
        gene_obj["start"] = ensembl_gene["gene_start"]
        gene_obj["end"] = ensembl_gene["gene_end"]
        # ensembl ids can differ between builds. The ensembl gene ids from HGNC are only
        # true for build 38. So we add correct information from ensembl
        gene_obj["ensembl_gene_id"] = ensembl_gene["ensembl_gene_id"]
Exemplo n.º 2
0
def add_ensembl_info(genes, ensembl_lines):
    """Add the coordinates from ensembl
    
    Args:
        genes(dict): Dictionary with all genes
        ensembl_lines(iteable): Iteable with raw ensembl info
    """
    
    LOG.info("Adding ensembl coordinates")
    # Parse and add the ensembl gene info
    if isinstance(ensembl_lines, DataFrame):
        ensembl_genes = parse_ensembl_gene_request(ensembl_lines)
    else:
        ensembl_genes = parse_ensembl_genes(ensembl_lines)

    for ensembl_gene in ensembl_genes:
        gene_obj = genes.get(ensembl_gene['hgnc_id'])
        if not gene_obj:
            continue
        gene_obj['chromosome'] = ensembl_gene['chrom']
        gene_obj['start'] = ensembl_gene['gene_start']
        gene_obj['end'] = ensembl_gene['gene_end']
        # ensembl ids can differ between builds. There is one stated in HGNC
        # that is true for build 38. So we add information from ensembl
        gene_obj['ensembl_gene_id'] = ensembl_gene['ensembl_gene_id']
Exemplo n.º 3
0
def add_ensembl_info(genes, ensembl_lines):
    """Add the coordinates from ensembl
    
    Args:
        genes(dict): Dictionary with all genes
        ensembl_lines(iteable): Iteable with raw ensembl info
    """

    LOG.info("Adding ensembl coordinates")
    # Parse and add the ensembl gene info
    if isinstance(ensembl_lines, DataFrame):
        ensembl_genes = parse_ensembl_gene_request(ensembl_lines)
    else:
        ensembl_genes = parse_ensembl_genes(ensembl_lines)

    for ensembl_gene in ensembl_genes:
        gene_obj = genes.get(ensembl_gene['hgnc_id'])
        if not gene_obj:
            continue
        gene_obj['chromosome'] = ensembl_gene['chrom']
        gene_obj['start'] = ensembl_gene['gene_start']
        gene_obj['end'] = ensembl_gene['gene_end']
        # ensembl ids can differ between builds. There is one stated in HGNC
        # that is true for build 38. So we add information from ensembl
        gene_obj['ensembl_gene_id'] = ensembl_gene['ensembl_gene_id']
Exemplo n.º 4
0
def exons(ctx, genes, build, exons, chromosome):
    """Generate exons scout. Need to have a ensemble gene file generated from above"""
    if chromosome:
        chromosome = [chromosome]
    ensg_to_hgncid = {}

    for gene_info in parse_ensembl_genes(genes):
        ensgid = gene_info["ensembl_gene_id"]
        hgncid = gene_info["hgnc_id"]

        ensg_to_hgncid[ensgid] = hgncid

    for i, line in enumerate(fetch_ensembl_exons(build=build, chromosomes=chromosome)):
        if i == 0:
            header = line.rstrip().split("\t")
            click.echo(line)
            continue
        exon_line = line.rstrip().split("\t")
        exon_info = dict(zip(header, exon_line))
        gene_id = exon_info["Gene stable ID"]
        if not gene_id in ensg_to_hgncid:
            continue
        click.echo(line)