Exemplo n.º 1
0
    # if we have an expression file we need fc and expression columns
    if exprfile != None:
        assert fccol != None
        assert exprcols != None
    else:
        exprcols = []

    assert outputfile != None

    genedata = EnsemblGenes(assembly=assembly)

    genome = Genome(genomeBuild=assembly)

    if assembly == "hg18":
        cpgIslands = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg18/cpgislands/cpgislands.bed"))
        lINEs = ExtendedBed(
            os.path.expanduser("~/mount/publicdata/hg18/repeats/LINEs-0.bed"))
        sINEs = ExtendedBed(
            os.path.expanduser("~/mount/publicdata/hg18/repeats/SINEs-0.bed"))
    elif assembly == "hg19":
        cpgIslands = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg19/CpGIslands/cpgislands.bed"))
        lINEs = ExtendedBed(
            os.path.expanduser(
                "~/mount/publicdata/hg19/Repeats/UCSC_HG19_LINEs.bed"),
            defaultkeys=["chrom", "chromStart", "chromEnd", "name", "strand"])
        sINEs = ExtendedBed(
            os.path.expanduser(
Exemplo n.º 2
0
genespluspromotor = Ensembl.ReverseGeneMapping(
    genedata, tssPadding=UPSTREAM_PROMOTOR_DIST)

genepromotors = Ensembl.ReversePromotorMapping(
    genedata,
    upstreamPadding=UPSTREAM_PROMOTOR_DIST,
    downstreamPadding=DOWNSTREAM_PROMOTOR_DIST)

exons = Ensembl.ReverseExonMapping(genedata)

transcriptionSites = Ensembl.TranscriptionSites(genedata)

# UCSC table browser - Expression & Regulation - CpG Islands
# Download all columns with exception of "bin"
cpgIslands = ExtendedBed(
    os.path.expanduser("/mnt/50tb/publicdata/" + assembly +
                       "/CpGIslands/cpgislands.bed"))

# UCSC table browser - Mapping and Sequencing - Chromosome Bands
# Download all columns with exception of "gieStain"
gBanding = ExtendedBed(os.path.expanduser("/mnt/50tb/publicdata/" + assembly +
                                          "/G-Banding/cytogenetic.map.bed"),
                       defaultkeys=["chrm", "start", "stop", "band"],
                       forcekeys=True)

chromosomeEnds = ChromosomeEnds(assembly)

###
###
###
Exemplo n.º 3
0
# takes in a csv file of intervals and tells us some stuff about them
for o, a in opts:
    if o == "-i":
        infile = a
    elif o == "-o":
        outfile = a

promotorUp = 2000
promotorDown = 2000

intervals = csv.reader(open(infile, "r"), delimiter="\t")

writer = csv.writer(open(outfile, "w"), delimiter="\t")

cpgIslands = ExtendedBed(
    os.path.expanduser(
        "~/mount/publicdata/hg18/cpgislands/cpgislands-0-index.bed"))

genome = Genome()

# load gene data
genedata = Ensembl.EnsemblGenes(assembly="hg18", annotation="ncbi36.1")

headerRow = [
    'Ensembl', 'Name', 'chr', 'start', 'stop', 'strand', 'No. Transcripts',
    'Avg. Exons per Transcript', "Unique Exons per Gene", "Start positions",
    "Start positions / No. Transcripts"
]

#"Promotor G-Count",  "Promotor C-Count",  "Promotor A-Count", "Promotor T-Count" ,
headerRow.extend([
Exemplo n.º 4
0
from bed.treatment import ExtendedBed
from sam.SamFormat import SAMFile
import os
import sys

lads = ExtendedBed(
    os.path.expanduser("~/mount/privatedata/non-Adams/donahue.greg/LADs.bed"))

alignments = SAMFile(os.path.expanduser(sys.argv[1]))

numbInLAD = 0
numbNotInLAD = 0

currentSeq = None


def previousKey(key, isin, isnotin):
    print key, isin, isnotin, isin / float(isin + isnotin)


for samEntry in alignments:

    if samEntry.chrm == "*":
        continue

    if currentSeq != None and samEntry.key != currentSeq:
        # print the stats on the previous key
        previousKey(currentSeq, numbInLAD, numbNotInLAD)
        numbInLAD = 0
        numbNotInLAD = 0