def _storeOtherDependentAttrs(self):
     GEDependentAttributesHolder._storeOtherDependentAttrs(self)
     
     self._isSorted = self._geIter.isSorted()
     self._hasCircularElements = self._geIter.hasCircularElements()
     self._hasNoOverlappingElements = self._geIter.hasNoOverlappingElements()
     self._hasUndirectedEdges = self._geIter.hasUndirectedEdges()
     self._valDataType = self._geIter.getValDataType()
     self._edgeWeightDataType = self._geIter.getEdgeWeightDataType()
     
     self._geSource.__class__ = GtrackGenomeElementSource
示例#2
0
    def _storeOtherDependentAttrs(self):
        GEDependentAttributesHolder._storeOtherDependentAttrs(self)

        self._isSorted = self._geIter.isSorted()
        self._hasCircularElements = self._geIter.hasCircularElements()
        self._hasNoOverlappingElements = self._geIter.hasNoOverlappingElements(
        )
        self._hasUndirectedEdges = self._geIter.hasUndirectedEdges()
        self._valDataType = self._geIter.getValDataType()
        self._edgeWeightDataType = self._geIter.getEdgeWeightDataType()

        self._geSource.__class__ = GtrackGenomeElementSource
    def __init__(self, geSource):
        try:
            if not geSource.hasBoundingRegionTuples():
                self._geSource = GEDependentAttributesHolder(geSource)
            else:
                self._geSource = geSource
        except NotIteratedYetError:
            self._geSource = geSource

        try:
            self._geSource.parseFirstDataLine()
            self._emptyGeSource = False
        except:
            self._emptyGeSource = True
示例#4
0
    def _commonTestComposer(self, withTrackGESource, composerCls, suffix):
        geSourceTest = self._commonSetup()

        for caseName in geSourceTest.cases:
            if not (caseName == suffix or \
                    (caseName.startswith(suffix) and caseName[len(suffix)] in ['_','.'])):
                continue

            if 'no_print' in caseName or \
                withTrackGESource and ('no_track_extract' in caseName or \
                                       caseName.endswith('_no_hb')):
                print 'Test case skipped: ' + caseName
                continue

            print caseName
            case = geSourceTest.cases[caseName]

            testFn = self._writeTestFile(case)
            sourceClass = case.sourceClass if case.sourceClass is not None else GenomeElementSource
            genome = self.GENOME if withTrackGESource else case.genome

            rawCaseGESource = sourceClass(testFn, genome, printWarnings=False)
            caseGESource = GEDependentAttributesHolder(rawCaseGESource)
            #actualSourceClass = caseGESource._geSource.__class__

            if withTrackGESource:
                for x in caseGESource:
                    pass

                boundingRegionTuples = caseGESource.getBoundingRegionTuples()
                boundingRegions = [br.region for br in boundingRegionTuples]
                if boundingRegions == [] or all(br.chr is None
                                                for br in boundingRegions):
                    boundingRegions = GlobalBinSource(self.GENOME)

                trackName = self.TRACK_NAME_PREFIX + case.trackName
                self._preProcess(trackName)

                allowOverlaps = True if (
                    'start' in case.prefixList
                ) and not caseName.endswith('_compose_no_overlaps') else False
                inputGESource = TrackGenomeElementSource(self.GENOME, trackName, boundingRegions, \
                                                         printWarnings=False, allowOverlaps=allowOverlaps)
    def testSorting(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_sort' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            print caseName
            print
            
            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print
            
            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))
            
            
            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()
            
            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass
            
            self.assertEquals(sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()])
示例#6
0
                inputGESource = TrackGenomeElementSource(self.GENOME, trackName, boundingRegions, \
                                                         printWarnings=False, allowOverlaps=allowOverlaps)
            else:
                inputGESource = rawCaseGESource

            composer = composerCls(inputGESource)
            contents = composer.returnComposed()
            print contents

            composedFile = NamedTemporaryFile('w', suffix='.' + suffix)
            composedFile.write(contents)
            composedFile.flush()

            #print actualSourceClass.__name__

            outputGESource = GEDependentAttributesHolder(
                sourceClass(composedFile.name, genome, printWarnings=False))

            if 'no_check_print' in caseName or withTrackGESource and 'no_check_track_extract' in caseName:
                print 'No checks for case: ' + caseName
            else:
                caseGEs = [ge.getCopy() for ge in caseGESource]
                outputGEs = [ge.getCopy() for ge in outputGESource]
                isSortableGE = any(
                    getattr(caseGEs[0], x) is not None
                    for x in ['start', 'end']) if len(caseGEs) > 0 else False
                if withTrackGESource and isSortableGE and not caseGESource.hasBoundingRegionTuples(
                ):
                    caseGEs = sorted(caseGEs)
                self.assertGenomeElementLists(caseGEs, outputGEs)
                self.assertListsOrDicts(
                    caseGESource.getBoundingRegionTuples(),
示例#7
0
 def _decorateGESource(self, geSource):
     return GEDependentAttributesHolder(geSource)
class FileFormatComposer(object):
    FILE_SUFFIXES = ['']
    FILE_FORMAT_NAME = ''

    def __init__(self, geSource):
        try:
            if not geSource.hasBoundingRegionTuples():
                self._geSource = GEDependentAttributesHolder(geSource)
            else:
                self._geSource = geSource
        except NotIteratedYetError:
            self._geSource = geSource

        try:
            self._geSource.parseFirstDataLine()
            self._emptyGeSource = False
        except:
            self._emptyGeSource = True

        #self._emptyGeSource = True
        #for ge in self._geSource:
        #    self._emptyGeSource = False
        #    break

    @staticmethod
    def matchesTrackFormat(trackFormat):
        return MatchResult(match=False,
                           trackFormatName=trackFormat.getFormatName())

    def composeToFile(self, fn, ignoreEmpty=False, **kwArgs):
        ensurePathExists(fn)
        f = open(fn, 'w')
        ok = self._composeCommon(f, ignoreEmpty, **kwArgs)
        f.close()
        return ok

    def returnComposed(self, ignoreEmpty=False, **kwArgs):
        memFile = StringIO()
        self._composeCommon(memFile, ignoreEmpty, **kwArgs)
        return memFile.getvalue()

    def _composeCommon(self, out, ignoreEmpty=False, **kwArgs):
        if ignoreEmpty and self._emptyGeSource:
            return False

        self._compose(out, **kwArgs)
        return True

    def _compose(self, out, **kwArgs):
        raise AbstractClassError()

    def _commonFormatNumberVal(self, val):
        if isNan(val) or val is None:
            return '.'
        return ('%#.' + str(OUTPUT_PRECISION) + 'g') % val
        #return '%.5f' % val

    def _commonFormatBinaryVal(self, val):
        if val == BINARY_MISSING_VAL:
            return '.'
        return 1 if val == True else 0

    @classmethod
    def getDefaultFileNameSuffix(cls):
        return cls.FILE_SUFFIXES[0]
 def _commonTestComposer(self, withTrackGESource, composerCls, suffix):
     geSourceTest = self._commonSetup()
     
     for caseName in geSourceTest.cases:
         if not (caseName == suffix or \
                 (caseName.startswith(suffix) and caseName[len(suffix)] in ['_','.'])):
             continue
             
         if 'no_print' in caseName or \
             withTrackGESource and ('no_track_extract' in caseName or \
                                    caseName.endswith('_no_hb')):
             print 'Test case skipped: ' + caseName
             continue
             
         print caseName
         case = geSourceTest.cases[caseName]
         
         testFn = self._writeTestFile(case)
         sourceClass = case.sourceClass if case.sourceClass is not None else GenomeElementSource
         genome = self.GENOME if withTrackGESource else case.genome
         
         rawCaseGESource = sourceClass(testFn, genome, printWarnings=False)
         caseGESource = GEDependentAttributesHolder(rawCaseGESource)
         #actualSourceClass = caseGESource._geSource.__class__
         
         if withTrackGESource:
             for x in caseGESource:
                 pass
             
             boundingRegionTuples = caseGESource.getBoundingRegionTuples()
             boundingRegions = [br.region for br in boundingRegionTuples]
             if boundingRegions == [] or all(br.chr is None for br in boundingRegions):
                 boundingRegions = GlobalBinSource(self.GENOME)
             
             trackName = self.TRACK_NAME_PREFIX + case.trackName
             self._preProcess(trackName)
             
             allowOverlaps = True if ('start' in case.prefixList) and not caseName.endswith('_compose_no_overlaps') else False
             inputGESource = TrackGenomeElementSource(self.GENOME, trackName, boundingRegions, \
                                                      printWarnings=False, allowOverlaps=allowOverlaps)
         else:
             inputGESource = rawCaseGESource
                 
         composer = composerCls(inputGESource)
         contents = composer.returnComposed()
         print contents
         
         composedFile = NamedTemporaryFile('w', suffix='.' + suffix)
         composedFile.write(contents)
         composedFile.flush()
         
         #print actualSourceClass.__name__
         
         outputGESource = GEDependentAttributesHolder(sourceClass(composedFile.name, genome, printWarnings=False))
         
         if 'no_check_print' in caseName or withTrackGESource and 'no_check_track_extract' in caseName:
             print 'No checks for case: ' + caseName
         else:
             caseGEs = [ge.getCopy() for ge in caseGESource]
             outputGEs = [ge.getCopy() for ge in outputGESource]
             isSortableGE = any(getattr(caseGEs[0], x) is not None for x in ['start','end']) if len(caseGEs) > 0 else False
             if withTrackGESource and isSortableGE and not caseGESource.hasBoundingRegionTuples():
                 caseGEs = sorted(caseGEs)
             self.assertGenomeElementLists(caseGEs, outputGEs)
             self.assertListsOrDicts(caseGESource.getBoundingRegionTuples(), outputGESource.getBoundingRegionTuples())
         
         if withTrackGESource:
             self._removeAllTrackData(self.TRACK_NAME_PREFIX)
    def testHeaderExpansion(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_expand' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            onlyGuaranteed = 'no_types_expanded' in caseName
            
            print caseName
            print '==========='
            case = geSourceTest.cases[caseName]
            
            headerLines = [line if not self._isHeaderLine(line) else
                            '##' + ': '.join([str(x).lower() for x in Gtrack.getHeaderKeyValue(line.strip())])
                             for line in case.headerLines]
            
            fullContents = os.linesep.join(headerLines + case.lines)
            print 'Original:\n\n' + fullContents
            
            case.headerLines = [line for line in headerLines if not self._isExpandableHeader(line, onlyGuaranteed)]
            print '-----'
            print 'With headers removed:\n\n' + os.linesep.join(case.headerLines + case.lines)
            
            testFn = self._writeTestFile(case)
            
            expandedContents = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=False)

            print '-----'
            print 'With expanded headers:\n\n' + expandedContents
            
            expandedContentsOnlyNonDefaults = expandHeadersOfGtrackFileAndReturnContents(testFn, case.genome, onlyNonDefault=True)

            print '-----'
            print 'With expanded headers (only non-default headers):\n\n' + expandedContentsOnlyNonDefaults
            
            origExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in headerLines \
                                          if self._isExpandableHeader(line, onlyGuaranteed=False)])
            notExpandableHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in case.headerLines \
                                          if self._isHeaderLine(line) and not self._isValueNotKeptHeader(line)])
            expandedHeaders = dict([Gtrack.getHeaderKeyValue(line) for line in expandedContents.split(os.linesep) \
                                    if self._isHeaderLine(line)])
            
            if 'no_check_expand' in caseName:
                print 'No checks for case: ' + caseName
            else:
                for header in origExpandableHeaders:
                    self.assertEquals(origExpandableHeaders[header], expandedHeaders[header])
                for header in notExpandableHeaders:
                    self.assertEquals(notExpandableHeaders[header], expandedHeaders[header])
                    
                for contents in [expandedContents, expandedContentsOnlyNonDefaults]:
                    
                    sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
                    forPreProcessor = True if case.sourceClass is None else False

                    stdGeSource = GEDependentAttributesHolder(sourceClass('expanded.gtrack', case.genome, \
                                                                          forPreProcessor=forPreProcessor, \
                                                                          printWarnings=False, \
                                                                          strToUseInsteadOfFn=contents))
                    
                    self.assertEquals(case.assertElementList, [ge for ge in stdGeSource])
                    self.assertEquals(case.boundingRegionsAssertList, [br for br in stdGeSource.getBoundingRegionTuples()])
 def __init__(self, geSource, genome=None):
     from gold.origdata.GEDependentAttributesHolder import GEDependentAttributesHolder
     geSource = GEDependentAttributesHolder(geSource)
     GESourceWrapper.__init__(self, geSource)
     GenomeElementSource.__init__(self, '', genome=genome)