Пример #1
0
    def _generate(self, allAssays):
        if 0:
            # mouse
            m = MetadataWS(Datasets.all_mouse)
            mm10epis = m.chipseq_tf_annotations_mm10(date="2016-05-01")

            # human
            m = MetadataWS(Datasets.all_human)
            encodeHg19 = m.chipseq_tf_annotations_hg19(date="2016-05-01")

            outFnp = "/data/projects/encode/encyclopedia_v3/webEpigenomesLoader.cpickle"
            with open(outFnp, 'wb') as f:
                p = pickle.Pickler(f)
                p.dump(mm10epis)
                p.dump(encodeHg19)
            print("wrote", outFnp)
        else:
            outFnp = os.path.join(os.path.dirname(__file__), "../../../webEpigenomesLoader.cpickle")
            #outFnp = "/data/projects/encode/encyclopedia_v3/webEpigenomesLoader.cpickle"
            with open(outFnp, 'rb') as f:
                p = pickle.Unpickler(f)
                mm10epis = p.load()
                encodeHg19 = p.load()
            print("read", outFnp)

        for assays in allAssays:
            self._loadEpigenomes("mm10", mm10epis, assays)

        roadmapMetadata = RoadmapMetadata(self.histMark, self.assayType)
        roadmap = roadmapMetadata.epigenomes
        hg19epis = Epigenomes("ROADMAP + ENCODE", "hg19")
        hg19epis.epis = encodeHg19.epis + roadmap.epis
        for assays in allAssays:
            self._loadEpigenomes("hg19", hg19epis, assays)
Пример #2
0
def outputSubTrack(priority, assembly, bt, btn, expIDs, fnp, idx, total,
                   biosample_type, biosample_term_name, lookupByExp):
    mw = MetadataWS(host=Host)
    exps = mw.exps(expIDs)

    actives = []
    for expID in expIDs:
        if expID in lookupByExp:
            actives.append(lookupByExp[expID].isActive())
    isActive = any(t for t in actives)
    if isActive:
        print("active biosample:", btn)

    parent = Parent(bt + '_' + btn, isActive)

    tracks = Tracks(assembly, parent, False)
    for exp in exps:
        active = False
        expID = exp.encodeID
        cREs = {}
        if expID in lookupByExp:
            active = lookupByExp[expID].isActive()
            cREs = lookupByExp[expID].ccREs
        tracks.addExp(exp, True, cREs)

    Utils.ensureDir(fnp)
    with open(fnp, 'w') as f:
        for line in tracks.lines(priority):
            f.write(line)
    printWroteNumLines(fnp, idx, 'of', total)
    return tracks.subgroups()
Пример #3
0
def main():
    expID = 'ENCSR000BCA'  #ENCSR000SKS' #ENCSR000BCE' #'ENCSR000AEC'
    if 1:
        mw = MetadataWS(host="http://192.168.1.46:9008/metadata")
        exp = mw.exps([expID])[0]
    else:
        exp = Exp.fromJsonFile(expID)
    files = bigWigFilters("hg19", exp)
    print("found", len(files))
    for f in files:
        print(f, f.bio_rep)
Пример #4
0
def process(args, expID):
    exp = MetadataWS.exp(expID)
    try:
        bigWigs = bigWigFilters(args.assembly, exp.files)
        if not bigWigs:
            return
        bigWig = bigWigs[0]
        bigWigFnp = bigWig.fnp()
        if os.path.exists(bigWig.normFnp()):
            print "skipping", exp
            return 0
        else:
            print "missing", bigWig.normFnp()
        bwAssembly = bigWig.assembly
        if not bigWigFnp:
            print exp.getSingleBigWigSingleFnp(args)
            print "missing", exp
        else:
            cmds = [normBin,
                    "--assembly=" + bwAssembly,
                    "--bwFnp=" + bigWig.normFnp(),
                    bigWigFnp]
            print " ".join(cmds)
            print Utils.runCmds(cmds)
            return 0
    except Exception, e:
        print "bad " + str(e)
Пример #5
0
def process(args, expID):
    exp = MetadataWS.exp(expID)
    try:
        bigWigs = bigWigFilters(args.assembly, exp.files)
        if not bigWigs:
            return
        bigWig = bigWigs[0]
        bigWigFnp = bigWig.fnp()
        if os.path.exists(bigWig.normFnp()):
            print "skipping", exp
            return 0
        else:
            print "missing", bigWig.normFnp()
        bwAssembly = bigWig.assembly
        if not bigWigFnp:
            print exp.getSingleBigWigSingleFnp(args)
            print "missing", exp
        else:
            cmds = [
                normBin, "--assembly=" + bwAssembly,
                "--bwFnp=" + bigWig.normFnp(), bigWigFnp
            ]
            print " ".join(cmds)
            print Utils.runCmds(cmds)
            return 0
    except Exception, e:
        print "bad " + str(e)
    def __init__(self, args, assembly, globalData, priority):
        self.args = args
        self.assembly = assembly
        self.globalData = globalData
        self.priority = priority

        dataset = Datasets.byAssembly(assembly)
        self.mw = MetadataWS(dataset=dataset, host=Host)
Пример #7
0
    def _doImport(self):
        mc = None
        if Config.memcache:
            mc = MemCacheWrapper(Config.memcache)
        qd = QueryDCC(auth=False, cache=mc)

        m = MetadataWS.byAssembly(self.assembly)
        allExps = m.all_bigBeds_bigWigs(self.assembly)
        printt("found", len(allExps))

        ret = {}
        ns = self.pgSearch.loadNineStateGenomeBrowser()
        total = len(ns)
        counter = 1
        for ctn, v in ns.iteritems():
            printt(counter, 'of', total, ctn)
            counter += 1
            btns = set()
            for fileID in [v["dnase"], v["h3k4me3"], v["h3k27ac"], v["ctcf"]]:
                if 'NA' == fileID:
                    continue
                exp = qd.getExpFromFileID(fileID)
                btns.add(exp.biosample_term_name)

            exps = filter(lambda e: e.biosample_term_name in btns, allExps)
            ret[ctn] = []
            for e in exps:
                q = {
                    "expID":
                    e.encodeID,
                    "assay_term_name":
                    e.assay_term_name,
                    "target":
                    e.target,
                    "tf":
                    e.tf,
                    "bigWigs": [{
                        "fileID": f.fileID,
                        "techRep": f.technical_replicates
                    } for f in e.files if f.isBigWig()],
                    "beds": [{
                        "fileID": f.fileID,
                        "techRep": f.technical_replicates
                    } for f in e.files if f.isBigBed()]
                }
                ret[ctn].append(q)

            ret[ctn] = sorted(ret[ctn],
                              key=lambda q:
                              (q["assay_term_name"], q["target"], q["tf"]))
            self.curs.execute(
                """
            INSERT INTO {tableName} (cellTypeName, tracks)
VALUES (%s, %s)""".format(tableName=self.tableName),
                (ctn, json.dumps(ret[ctn])))
    def makeTrackDb(self):
        trackhubFnp = os.path.join(os.path.dirname(__file__),
                                   "..", "views", "interacting_gene", "trackhub.txt")
        with open(trackhubFnp) as f:
            fileLines = f.read()

        lines = []

        dataset = Datasets.all_human
        m = MetadataWS(dataset)
        exps = m.biosample_term_name("GM12878")

        for exp in sorted(exps, key=lambda x: (x.assay_term_name, x.tf, x.lab)):
            lines += [self.trackhubExp(exp)]

        f = StringIO.StringIO()
        for line in lines:
            if line:
                f.write(line + "\n")

        return fileLines + "\n" + f.getvalue()
Пример #9
0
    def _generate(self, allAssays):
        if 0:
            # mouse
            m = MetadataWS(Datasets.all_mouse)
            mm10epis = m.chipseq_tf_annotations_mm10(date="2016-05-01")

            # human
            m = MetadataWS(Datasets.all_human)
            encodeHg19 = m.chipseq_tf_annotations_hg19(date="2016-05-01")

            outFnp = "/data/projects/encode/encyclopedia_v3/webEpigenomesLoader.cpickle"
            with open(outFnp, 'wb') as f:
                p = pickle.Pickler(f)
                p.dump(mm10epis)
                p.dump(encodeHg19)
            print("wrote", outFnp)
        else:
            outFnp = os.path.join(os.path.dirname(__file__),
                                  "../../../webEpigenomesLoader.cpickle")
            #outFnp = "/data/projects/encode/encyclopedia_v3/webEpigenomesLoader.cpickle"
            with open(outFnp, 'rb') as f:
                p = pickle.Unpickler(f)
                mm10epis = p.load()
                encodeHg19 = p.load()
            print("read", outFnp)

        for assays in allAssays:
            self._loadEpigenomes("mm10", mm10epis, assays)

        roadmapMetadata = RoadmapMetadata(self.histMark, self.assayType)
        roadmap = roadmapMetadata.epigenomes
        hg19epis = Epigenomes("ROADMAP + ENCODE", "hg19")
        hg19epis.epis = encodeHg19.epis + roadmap.epis
        for assays in allAssays:
            self._loadEpigenomes("hg19", hg19epis, assays)
Пример #10
0
    def makeTrackDb(self):
        trackhubFnp = os.path.join(os.path.dirname(__file__), "..", "views",
                                   "interacting_gene", "trackhub.txt")
        with open(trackhubFnp) as f:
            fileLines = f.read()

        lines = []

        dataset = Datasets.all_human
        m = MetadataWS(dataset)
        exps = m.biosample_term_name("GM12878")

        for exp in sorted(exps, key=lambda x:
                          (x.assay_term_name, x.tf, x.lab)):
            lines += [self.trackhubExp(exp)]

        f = StringIO.StringIO()
        for line in lines:
            if line:
                f.write(line + "\n")

        return fileLines + "\n" + f.getvalue()
Пример #11
0
    def makeJobs(self):
        m = MetadataWS(Datasets.byAssembly(self.assembly))

        allExps = [(m.chipseq_tfs_useful(), "tf"),
                   (m.chipseq_histones_useful(), "histone")]
        allExpsIndiv = []
        for exps, etype in allExps:
            printt("found", len(exps), etype)
            exps = [Exp.fromJsonFile(e.encodeID) for e in exps]
            exps = filter(lambda e: "ERROR" not in e.jsondata["audit"], exps)
            printt("found", len(exps), etype,
                   "after removing ERROR audit exps")
            for exp in exps:
                allExpsIndiv.append((exp, etype))
        random.shuffle(allExpsIndiv)
        total = len(allExpsIndiv)

        i = 0
        jobs = []
        for exp, etype in allExpsIndiv:
            i += 1
            try:
                bed = exp.getUsefulPeakFile(self.assembly)
                if not bed:
                    printt("missing", exp)
                jobs.append({
                    "exp": exp,  # this is an Exp
                    "bed": bed,  # this is an ExpFile
                    "i": i,
                    "total": total,
                    "assembly": self.assembly,
                    "etype": etype
                })
            except Exception, e:
                printt(str(e))
                printt("bad exp:", exp)
Пример #12
0
    def __init__(self, args, siteInfo):
        self.args = args
        self.ontology = Ontology()

        byAssembly = {}

        m = MetadataWS(Datasets.all_mouse)
        interacting_gene = InteractingGeneMetadata().epigenomes
        combined = Epigenomes("ROADMAP + ENCODE", "hg19")
        combined.epis = interacting_gene.epis
        byAssembly["hg19"] = combined

        self.byAssemblyAssays = defaultdict(lambda: defaultdict(None))
        for assembly in ["hg19"]:
            for assays in ["InteractingGene"]:
                epis = byAssembly[assembly].GetByAssays(assays)
                if epis:
                    epis = [
                        WebEpigenome(self.args, epi, assays, self.ontology)
                        for epi in epis
                    ]
                    self.byAssemblyAssays[assembly][assays] = WebEpigenomes(
                        self.args, assembly, assays, epis)