Exemplo n.º 1
0
 def trackDb(self):
     '''The Ra file in the trackDb for this composite'''
     try:
         return self._trackDb
     except AttributeError:
         self._trackDb = RaFile(self._trackDbPath)
         return self._trackDb
Exemplo n.º 2
0
    def __init__(self, filePath=None, handler=None, protocolPath=None):
        """sets up exception handling method, and optionally reads from a file"""
        RaFile.__init__(self)

        self.handler = handler
        if handler == None:
            self.handler = self.raiseException

        if filePath == None:
            filePath = encodeUtils.defaultCvPath()

        self.protocolPath = protocolPath
        if protocolPath == None:
            self.protocolPath == os.path.expanduser("~/htdocsExtras/ENCODE/")

        self.missingTypes = set()

        self.read(filePath)
Exemplo n.º 3
0
    def __init__(self, filePath=None, handler=None, protocolPath=None):
        '''sets up exception handling method, and optionally reads from a file'''
        RaFile.__init__(self)

        self.handler = handler
        if handler == None:
            self.handler = self.raiseException

        if filePath == None:
            filePath = encodeUtils.defaultCvPath()

        self.protocolPath = protocolPath
        if protocolPath == None:
            self.protocolPath == os.path.expanduser('~/htdocsExtras/ENCODE/')

        self.missingTypes = set()

        self.read(filePath)
Exemplo n.º 4
0
    def checkMetaDbForFiles(self, status, state):
        if state == 'new':
            (mdb, files, loose) = (self.newMdb, self.newReleaseFiles,
                                   self.loose)
        elif state == 'old':
            (mdb, files, loose) = (self.oldMdb, self.oldReleaseFiles,
                                   self.loose)

        errors = []
        revokedset = set()
        revokedfiles = set()
        atticset = set()
        supplementalset = set()
        filtermdb = RaFile()

        for i in files:
            if re.match('supplemental', i):
                supplementalset.add(i)
            if not re.match('wgEncode.*', i):
                continue

            filestanza = mdb.filter(
                lambda s: re.match(".*%s.*" % i, s['fileName']), lambda s: s)
            #should only return 1, just in case
            if filestanza:
                for j in filestanza:
                    filtermdb[j.name] = j
                    if 'objStatus' in j and re.search(
                            'revoked|replaced|renamed', j['objStatus']):
                        revokedfiles.add(i)
                        revokedset.add(j.name)
                    if 'attic' in j:
                        atticset.add(j.name)
            else:
                #pass
                if loose and state == 'old' and re.match('.*bai', i):
                    pass
                else:
                    errors.append("metaDb: %s is not mentioned in %s" %
                                  (i, status))

        return (filtermdb, revokedset, revokedfiles, atticset, supplementalset,
                errors)
import sys, string
import re
from ucscGb.gbData.ordereddict import OrderedDict
from ucscGb.gbData.ra.raFile import RaFile
from ucscGb.gbData import ucscUtils
import collections


rafile = RaFile("../data/cvJan15.ra")
treatment = 0

for key in rafile.keys():
    thisstanza = rafile[key]

    if thisstanza["type"] == "treatment":
        print thisstanza["term"]
Exemplo n.º 6
0
Arquivo: mdb.py Projeto: bowhan/kent
 def __init__(self, filepath):
     RaFile.__init__(self)
     self.read(filepath)
import sys, string
import re
from ucscGb.gbData.ordereddict import OrderedDict
from ucscGb.gbData.ra.raFile import RaFile
from ucscGb.gbData import ucscUtils
import collections


rafile = RaFile('../data/cv.ra')
metafile = RaFile('../data/mouse_meta.ra')

cell_id = 1
cells = {}
centers = {"CRG-Guigo-m":7,"CSHL-m":7,"Caltech-m":3,"FSU-m":8,"NHGRI-Elnitski":"REMOVE","PSU-m":4,"Stanford-m":5,"UW-m":1,"Yale-m":5,"LICR-m":2}
points = {}
datatype = 0
control = {"Control_32bp":161,"Control_36bp":161,"Control_50bp":161,"IgG-Yale":165,"IgG-mus":162,"IgG-rab":163,"IgG-rat":164,"Input":161,"std":166}
antibodies = {}
targets = {'H3ac':105,'H3K27ac':106,'H3K27me3':107,'H3K36me3':108,'H3K4me1':109,'H3K4me2':110,'H3K4me3':111,'H3K79me2':112,'H3K79me3':113,'H3K9ac':114,'H3K9me3':115,'BHLHE40':116,'CEBPB':117,'CHD1':118
,'CHD2':119,'CTCF':120,'E2F4':121,'EP300':122,'ETS1':123,'FLI1':124,'FOSL1':125,'GABPA':126,'GATA1':127,'GATA2':128,'HCFC1':129,'JUN':130,'JUND':131,'KAT2A':132,'MAFK':133,'MAX':134,'MAZ':135,'MXI1':136
,'MYB':137,'MYC':138,'MYOD1':139,'MYOG':140,'PAX5':141,'POLR2A':142,'RAD21':143,'RCOR1':144,'RDBP':145,'REST':146,'SIN3A':147,'SMC3':148,'SRF':149,'TAL1':150,'TBP':151,'TCF12':152,'TCF3':153,'UBTF':154
,'USF1':155,'USF2':156,'ZC3H11A':157,'ZKSCAN1':158,'ZMIZ1':159,'ZNF384':160}
cell_item_id = 4001
center_id = 2001
exps = {}
exps2 = {}


for key in rafile.keys():
    thisstanza = rafile[key]
Exemplo n.º 8
0
    def __init__(self, args):
        self.releaseNew = args['releaseNew']
        self.releaseOld = args['releaseOld']
        self.database = args['database']
        self.composite = args['composite']
        self.loose = args['loose']
        self.ignore = args['ignore']
        self.summary = args['summary']
        self.specialMdb = args['specialMdb']
        self.args = args
        if 'verbose' in args:
            self.verbose = args['verbose']
        else:
            self.verbose = 0

        errors = []
        c = track.CompositeTrack(self.database, self.composite, None,
                                 self.specialMdb)

        #sanitize arguments
        if not self.releaseOld.isdigit():
            self.releaseOld = 'solo'
        elif int(self.releaseOld) <= 0:
            self.releaseOlf = 'solo'
        elif self.releaseOld > self.releaseNew:
            self.releaseOld = 'solo'
        if self.verbose >= 1:
            sys.stderr.write("Initializing MkChangeNotes\n")
        self.releasePath = c.httpDownloadsPath + 'release' + args['releaseNew']
        self.gbdbPath = "/gbdb/%s/bbi" % args['database']
        self.trackDbFile = c.currentTrackDb
        if not self.trackDbFile:
            self.trackDb = None
            errors.append(
                "track: There is no entry in trackDb.wgEncode.ra for %s with the alpha tag"
                % self.composite)
        else:
            self.trackDb = RaFile(self.trackDbFile, "track")

        if int(self.releaseNew) > 1 and str(self.releaseOld) != 'solo':
            if self.verbose >= 2:
                sys.stderr.write("Comparison mode\n")
            self.newReleaseFiles = c.releases[int(self.releaseNew) - 1]
            self.oldReleaseFiles = c.releases[int(self.releaseOld) - 1]
            self.releasePathOld = c.httpDownloadsPath + 'release' + args[
                'releaseOld']

            self.newMdb = c.alphaMetaDb
            self.oldMdb = c.publicMetaDb

            if self.verbose >= 2:
                sys.stderr.write("Checking for missing files\n")
            #make a list of missing files
            self.missingFiles = self.__checkFilesForDropped()
            #filter them out of old release files

            if self.verbose >= 1:
                sys.stderr.write("Scanning and parsing release directories\n")
            #check if all files listed in release directories have associated metaDb entries
            (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet,
             self.newSupplementalSet,
             newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
            (self.oldMdb, self.oldRevokedSet, self.oldRevokedFiles,
             self.oldAtticSet, self.oldSupplementalSet,
             oldFileErrors) = self.checkMetaDbForFiles("public metaDb", "old")

            self.expIds = set(
                self.newMdb.filter(lambda s: 'expId' in s,
                                   lambda s: s['expId']))

            if self.verbose >= 2:
                sys.stderr.write("Checking for attic files\n")
            #check that attic fiels aren't in trackDb
            if self.trackDb:
                errors.extend(self.__checkAtticNotInTrackDb())

            #checks to see that nothing has disappeared between public and alpha
            if self.verbose >= 1:
                sys.stderr.write("Checking new metaDb for missing stanzas\n")
            errors.extend(self.__checkAlphaForDropped("alpha metaDb",
                                                      "stanza"))
            if self.verbose >= 1:
                sys.stderr.write("Checking file md5sums across releases\n")
            errors.extend(self.__checkMd5sums())

            #checks and gets tables that are present, also returns a revoked set of tables for new
            if self.verbose >= 1:
                sys.stderr.write("Checking table status\n")
            (self.newTableSet, self.revokedTableSet, self.newMissingTables,
             newTableError) = self.checkTableStatus("alpha metaDb", "new")
            (self.oldTableSet, spam, self.droppedTables,
             oldTableError) = self.checkTableStatus("public metaDb", "old")

            self.newInAttic = self.atticSet - self.oldAtticSet
            self.stillInAttic = self.oldAtticSet & self.atticSet
            self.oldTableSet = self.oldTableSet - self.atticSet
            self.noMoreAttic = self.oldAtticSet - self.atticSet

            self.changedTables = self.oldTableSet - self.newTableSet - self.revokedTableSet

            #same as above except for gbdbs
            if self.verbose >= 1:
                sys.stderr.write("Checking GBDB status\n")
            (self.newGbdbSet, self.revokedGbdbs,
             newGbdbError) = self.getGbdbFiles("new")
            (self.oldGbdbSet, eggs, oldGbdbError) = self.getGbdbFiles("old")
            #remove missing files from gbdbs
            self.oldGbdbSet = self.oldGbdbSet - self.missingFiles
            self.oldGbdbSet = self.oldGbdbSet - self.atticSet
            self.changedGbdbs = self.oldGbdbSet - self.newGbdbSet - self.revokedGbdbs
            for i in self.missingFiles:
                if i in self.oldReleaseFiles:
                    del self.oldReleaseFiles[i]

            #fill in the errors
            errors.extend(newFileErrors)
            errors.extend(oldFileErrors)
            errors.extend(newTableError)
            errors.extend(oldTableError)
            errors.extend(newGbdbError)
            errors.extend(oldGbdbError)

            if self.changedTables:
                errors.append(
                    "These tables were tables in the old release, but are no longer tables in the new release:"
                )
                errors.extend(list(self.changedTables))
            if self.changedGbdbs:
                errors.append(
                    "These GBDBs were GBDB tables in the old release, but are no longer GBDB tables in the new release:"
                )
                errors.extend(list(self.changedGbdbs))

            #for ease of typing
            totalFiles = set(self.newReleaseFiles)
            oldTotalFiles = set(self.oldReleaseFiles)

            #these could honestly be moved earlier, get a file list processing section or something
            #they clean out special fiels out and separated the master fiels list into the 3 required
            #ones: wgEncode, supplemental and additional.
            self.totalFiles = self.__cleanSpecialFiles(totalFiles)
            self.oldTotalFiles = self.__cleanSpecialFiles(oldTotalFiles)
            (self.oldTotalFiles, self.additionalList, self.oldAdditionalList,
             self.totalFiles) = self.__separateOutAdditional()

            #get the stuff you need to push
            self.pushTables = set(sorted(
                (self.newTableSet - self.oldTableSet)))
            self.pushFiles = set(sorted(
                (self.totalFiles - self.oldTotalFiles)))
            self.pushGbdbs = set(sorted((self.newGbdbSet - self.oldGbdbSet)))
            self.newSupp = self.newSupplementalSet - self.oldSupplementalSet

            self.newTables = set(self.pushTables)
            self.newFiles = set(
                ucscUtils.printIter(self.pushFiles, self.releasePath))
            self.newGbdbs = set(
                ucscUtils.printIter(self.pushGbdbs, self.gbdbPath))
            self.newSupplemental = set(
                ucscUtils.printIter(self.newSupp, self.releasePath))
            self.newOthers = set(
                ucscUtils.printIter(self.additionalList, self.releasePath))
            self.fullFiles = sorted(self.totalFiles - self.revokedFiles)
            self.fullTables = self.oldTableSet & self.newTableSet

            self.errors = errors
            #don't output.append(report unless ignore option is on or no errors
            #module mode doesn't generate output by default
            if self.verbose >= 1:
                sys.stderr.write("Creating report\n")
            if (not errors) or self.ignore:
                self.output = self.printReport(args, c)
            else:
                self.output = self.printErrors(errors, self.missingFiles)

        elif self.releaseOld == 'solo':

            self.newReleaseFiles = c.releases[int(self.releaseNew) - 1]
            self.oldReleaseFiles = set()

            self.newMdb = c.alphaMetaDb

            #check that attic fiels aren't in trackDb
            if self.trackDb:
                errors.extend(self.__checkAtticNotInTrackDb())

            (self.newMdb, self.revokedSet, self.revokedFiles, self.atticSet,
             self.newSupplementalSet,
             newFileErrors) = self.checkMetaDbForFiles("alpha metaDb", "new")
            self.expIds = set(
                self.newMdb.filter(lambda s: 'expId' in s,
                                   lambda s: s['expId']))

            (self.newTableSet, self.revokedTableSet, spam,
             newTableError) = self.checkTableStatus("alpha metaDb", "new")

            self.tableSize = self.__getTableSize()

            (self.newGbdbSet, self.revokedGbdbs,
             newGbdbError) = self.getGbdbFiles("new")

            #collect errors
            errors.extend(newFileErrors)
            errors.extend(newTableError)
            errors.extend(newGbdbError)

            #set for easy operations
            totalFiles = set(self.newReleaseFiles)

            #clean out special fiels we don't push i.e. md5sum.history
            self.totalFiles = self.__cleanSpecialFiles(totalFiles)

            self.pushTables = self.newTableSet
            self.pushFiles = self.totalFiles
            self.pushGbdbs = self.newGbdbSet
            self.newSupp = self.newSupplementalSet
            self.fullFiles = self.totalFiles
            self.fullTables = self.newTableSet

            #makes list for additional files
            (self.oldTotalFiles, self.oldSupplementalSet) = (set(), set())
            (self.oldReleaseFiles, self.additionalList, self.oldAdditionalList,
             self.totalFiles) = self.__separateOutAdditional()
            self.errors = errors
            if (not errors) or self.ignore:
                self.output = self.printReportOne(args, c)
            else:
                self.droppedTables = set()
                self.output = self.printErrors(errors, set())
Exemplo n.º 9
0
 def __init__(self, filepath):
     RaFile.__init__(self)
     self.read(filepath)