def getProbeset2Location(database="hgu133plus2.db"): '''build map with genomic coordinates for each probeset. The mapping is not necessarily unique. ''' R.library(database) prefix = database[:-len(".db")] contigs = dict(R(prefix + "CHRLENGTHS")) # map is a Bimap object result2start = R.toTable(R(prefix + "CHRLOC")) result2end = R.toTable(R(prefix + "CHRLOCEND")) mapping = collections.defaultdict(list) # make sure order is the same assert result2start["probe_id"] == result2end["probe_id"] for probeset_id, contig, start, end in zip(result2start["probe_id"], result2start["Chromosome"], result2start["start_location"], result2end["end_location"]): if start < 0: start = contigs[contig] - start end = contigs[contig] - end mapping[probeset_id].append((contig, start, end)) E.info("mappings: probes=%i, contigs=%i" % ( len(set(result2start["probe_id"])), len(set(result2start["Chromosome"])), )) return mapping
def getProbeset2Location(database="hgu133plus2.db"): '''build map with genomic coordinates for each probeset. The mapping is not necessarily unique. ''' R.library(database) prefix = database[:-len(".db")] contigs = dict(R(prefix + "CHRLENGTHS")) # map is a Bimap object result2start = R.toTable(R(prefix + "CHRLOC")) result2end = R.toTable(R(prefix + "CHRLOCEND")) mapping = collections.defaultdict(list) # make sure order is the same assert result2start["probe_id"] == result2end["probe_id"] for probeset_id, contig, start, end in zip(result2start["probe_id"], result2start["Chromosome"], result2start["start_location"], result2end["end_location"]): if start < 0: start = contigs[contig] - start end = contigs[contig] - end mapping[probeset_id].append((contig, start, end)) E.info("mappings: probes=%i, contigs=%i" % (len(set(result2start["probe_id"])), len(set(result2start["Chromosome"])), )) return mapping
def getProbeset2Gene(database): '''build map relating a probeset to an ENSEMBL gene_id''' prefix = database[:-len(".db")] mapping = prefix + "ENSEMBL" R.library(database) # map is a Bimap object m = R(mapping) result = R.toTable(m) mapping = collections.defaultdict(list) for probeset_id, gene_id in zip(result["probe_id"], result["ensembl_id"]): mapping[probeset_id].append(gene_id) E.info("obtained %i mappings: probes=%i, genes=%i" % (len(result), len(set( result["probe_id"])), len(set(result["ensembl_id"])))) return mapping
def getProbeset2Gene(database): """build map relating a probeset to an ENSEMBL gene_id""" prefix = database[: -len(".db")] mapping = prefix + "ENSEMBL" R.library(database) # map is a Bimap object m = R(mapping) result = R.toTable(m) mapping = collections.defaultdict(list) for probeset_id, gene_id in zip(result["probe_id"], result["ensembl_id"]): mapping[probeset_id].append(gene_id) E.info( "obtained %i mappings: probes=%i, genes=%i" % (len(result), len(set(result["probe_id"])), len(set(result["ensembl_id"]))) ) return mapping