def test_nlmsaslice_cache(self): "NLMSASlice sequence caching & removal" # set up sequences dnaseq = testutil.datafile('dnaseq.fasta') db = SequenceFileDB(dnaseq, autoGC=-1) # use pure WeakValueDict... try: gc.collect() assert len( db._weakValueDict) == 0, '_weakValueDict should be empty' seq1, seq2 = db['seq1'], db['seq2'] assert len(db._weakValueDict)==2, \ '_weakValueDict should have 2 seqs' # build referencing NLMSA mymap = NLMSA('test', 'memory', db, pairwiseMode=True) mymap += seq1 mymap[seq1] += seq2 mymap.build() # check: no cache assert not hasattr(db, '_cache'), 'should be no cache yet' seq1, seq2 = db['seq1'], db['seq2'] # re-retrieve # now retrieve a NLMSASlice, forcing entry of seq into cache ival = seq1[5:10] x = mymap[ival] assert len(db._cache.values()) != 0 n1 = len(db._cache) assert n1 == 1, "should be exactly one cache entry, not %d" % \ (n1, ) # ok, now trash referencing arguments & make sure of cleanup del x gc.collect() assert len(db._cache.values()) == 0 n2 = len(db._cache) assert n2 == 0, '%d objects remain; cache memory leak!' % n2 # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice. # Drop our references, the cache should empty. del mymap, ival, seq1, seq2 gc.collect() # check that db._weakValueDict cache is empty assert len( db._weakValueDict) == 0, '_weakValueDict should be empty' finally: db.close()
def test_nlmsaslice_cache(self): "NLMSASlice sequence caching & removal" # set up sequences dnaseq = testutil.datafile('dnaseq.fasta') db = SequenceFileDB(dnaseq, autoGC=-1) # use pure WeakValueDict... try: gc.collect() assert len(db._weakValueDict)==0, '_weakValueDict should be empty' seq1, seq2 = db['seq1'], db['seq2'] assert len(db._weakValueDict)==2, \ '_weakValueDict should have 2 seqs' # build referencing NLMSA mymap = NLMSA('test', 'memory', db, pairwiseMode=True) mymap += seq1 mymap[seq1] += seq2 mymap.build() # check: no cache assert not hasattr(db, '_cache'), 'should be no cache yet' seq1, seq2 = db['seq1'], db['seq2'] # re-retrieve # now retrieve a NLMSASlice, forcing entry of seq into cache ival = seq1[5:10] x = mymap[ival] assert len(db._cache.values()) != 0 n1 = len(db._cache) assert n1 == 1, "should be exactly one cache entry, not %d" % \ (n1, ) # ok, now trash referencing arguments & make sure of cleanup del x gc.collect() assert len(db._cache.values()) == 0 n2 = len(db._cache) assert n2 == 0, '%d objects remain; cache memory leak!' % n2 # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice. # Drop our references, the cache should empty. del mymap, ival, seq1, seq2 gc.collect() # check that db._weakValueDict cache is empty assert len(db._weakValueDict)==0, '_weakValueDict should be empty' finally: db.close()
def nlmsaslice_cache_test(self): "Test NLMSASlice sequence caching & removal" # set up sequences db = SequenceFileDB('dnaseq', autoGC=-1) # use pure WeakValueDict... gc.collect() assert len(db._weakValueDict)==0, '_weakValueDict should be empty' seq1, seq2 = db['seq1'], db['seq2'] assert len(db._weakValueDict)==2, '_weakValueDict should have 2 seqs' # build referencing NLMSA mymap = NLMSA('test', 'memory', db, pairwiseMode=True) mymap += seq1 mymap[seq1] += seq2 mymap.build() # check: no cache assert not hasattr(db, '_cache'), 'should be no cache yet' seq1, seq2 = db['seq1'], db['seq2'] # re-retrieve # now retrieve a NLMSASlice, forcing entry of seq into cache ival = seq1[5:10] x = mymap[ival] print 'this should not be empty:', db._cache.values() n1 = len(db._cache) assert n1 == 1, "should be exactly one cache entry, not %d" % (n1,) # ok, now trash referencing arguments & make sure of cleanup del x gc.collect() print 'this should be empty:', db._cache.values() n2 = len(db._cache) assert n2 == 0, '%d objects remain; cache memory leak!' % n2 # FAIL because of __dealloc__ error in cnestedlist.NLMSASlice. del mymap, ival, seq1, seq2 # drop our references, cache should empty gc.collect() # check that db._weakValueDict cache is empty assert len(db._weakValueDict)==0, '_weakValueDict should be empty'
#!user/bin/env python import pygr.Data from pygr.seqdb import BlastDB, AnnotationDB seqdb = BlastDB('/somepath/hg18.fa') exon_slices = pygr.Data.Collection(path='exon_slices.db',mode='c',writeback=False) # Store on disk exon_db = AnnotationDB(exon_slices, seqdb,sliceAttrDict=dict(id=0, exon_id=1, orientation=2, gene_id=3, start=4, stop=5)) from pygr.cnestedlist import NLMSA nlmsa = NLMSA('exonAnnot','w',use_virtual_lpo=True,bidirectional=False) ifile = file('exonslice.txt') for line in ifile: row = [x for x in line.split()] # CONVERT TO LIST SO MUTABLE row[1] = int(row[1]) # CONVERT FROM STRING TO INTEGER exon_slices[row[1]] = row exon = exon_db[row[1]] # GET THE ANNOTATION OBJECT FOR THIS EXON nlmsa.addAnnotation(exon) # SAVE IT TO GENOME MAPPING exon_db.clear() nlmsa.build() # FINALIZE GENOME ALIGNMENT INDEXES ifile.close()