Exemplo n.º 1
0
def getProbeset2Location(database="hgu133plus2.db"):
    '''build map with genomic coordinates for each probeset.

    The mapping is not necessarily unique.
    '''

    R.library(database)

    prefix = database[:-len(".db")]

    contigs = dict(R(prefix + "CHRLENGTHS"))

    # map is a Bimap object
    result2start = R.toTable(R(prefix + "CHRLOC"))
    result2end = R.toTable(R(prefix + "CHRLOCEND"))

    mapping = collections.defaultdict(list)

    # make sure order is the same
    assert result2start["probe_id"] == result2end["probe_id"]

    for probeset_id, contig, start, end in zip(result2start["probe_id"],
                                               result2start["Chromosome"],
                                               result2start["start_location"],
                                               result2end["end_location"]):

        if start < 0:
            start = contigs[contig] - start
            end = contigs[contig] - end

        mapping[probeset_id].append((contig, start, end))

    E.info("mappings: probes=%i, contigs=%i" % (
        len(set(result2start["probe_id"])),
        len(set(result2start["Chromosome"])),
    ))

    return mapping
Exemplo n.º 2
0
def getProbeset2Location(database="hgu133plus2.db"):
    '''build map with genomic coordinates for each probeset.

    The mapping is not necessarily unique.
    '''

    R.library(database)

    prefix = database[:-len(".db")]

    contigs = dict(R(prefix + "CHRLENGTHS"))

    # map is a Bimap object
    result2start = R.toTable(R(prefix + "CHRLOC"))
    result2end = R.toTable(R(prefix + "CHRLOCEND"))

    mapping = collections.defaultdict(list)

    # make sure order is the same
    assert result2start["probe_id"] == result2end["probe_id"]

    for probeset_id, contig, start, end in zip(result2start["probe_id"],
                                               result2start["Chromosome"],
                                               result2start["start_location"],
                                               result2end["end_location"]):

        if start < 0:
            start = contigs[contig] - start
            end = contigs[contig] - end

        mapping[probeset_id].append((contig, start, end))

    E.info("mappings: probes=%i, contigs=%i" %
           (len(set(result2start["probe_id"])),
            len(set(result2start["Chromosome"])),
            ))

    return mapping
Exemplo n.º 3
0
def getProbeset2Gene(database):
    '''build map relating a probeset to an ENSEMBL gene_id'''

    prefix = database[:-len(".db")]
    mapping = prefix + "ENSEMBL"
    R.library(database)

    # map is a Bimap object
    m = R(mapping)

    result = R.toTable(m)

    mapping = collections.defaultdict(list)
    for probeset_id, gene_id in zip(result["probe_id"], result["ensembl_id"]):
        mapping[probeset_id].append(gene_id)

    E.info("obtained %i mappings: probes=%i, genes=%i" %
           (len(result), len(set(
               result["probe_id"])), len(set(result["ensembl_id"]))))
    return mapping
Exemplo n.º 4
0
def getProbeset2Gene(database):
    """build map relating a probeset to an ENSEMBL gene_id"""

    prefix = database[: -len(".db")]
    mapping = prefix + "ENSEMBL"
    R.library(database)

    # map is a Bimap object
    m = R(mapping)

    result = R.toTable(m)

    mapping = collections.defaultdict(list)
    for probeset_id, gene_id in zip(result["probe_id"], result["ensembl_id"]):
        mapping[probeset_id].append(gene_id)

    E.info(
        "obtained %i mappings: probes=%i, genes=%i"
        % (len(result), len(set(result["probe_id"])), len(set(result["ensembl_id"])))
    )
    return mapping