def test_common_file_helpers(self): # genfile with self.getTestDir() as testdir: fd = s_common.genfile(testdir, 'woot', 'foo.bin') fd.write(b'genfile_test') fd.close() with open(os.path.join(testdir, 'woot', 'foo.bin'), 'rb') as fd: buf = fd.read() self.eq(buf, b'genfile_test') # reqpath with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'') self.eq(os.path.join(testdir, 'test.txt'), s_common.reqpath(testdir, 'test.txt')) self.raises(s_exc.NoSuchFile, s_common.reqpath, testdir, 'newp') # reqfile with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'reqfile_test') fd = s_common.reqfile(testdir, 'test.txt') buf = fd.read() self.eq(buf, b'reqfile_test') fd.close() self.raises(s_exc.NoSuchFile, s_common.reqfile, testdir, 'newp') # getfile with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'getfile_test') fd = s_common.getfile(testdir, 'test.txt') buf = fd.read() self.eq(buf, b'getfile_test') fd.close() self.none(s_common.getfile(testdir, 'newp')) # getbytes with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'getbytes_test') buf = s_common.getbytes(testdir, 'test.txt') self.eq(buf, b'getbytes_test') self.none(s_common.getbytes(testdir, 'newp')) # reqbytes with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'reqbytes_test') buf = s_common.reqbytes(testdir, 'test.txt') self.eq(buf, b'reqbytes_test') self.raises(s_exc.NoSuchFile, s_common.reqbytes, testdir, 'newp') # listdir with self.getTestDir() as dirn: path = os.path.join(dirn, 'woot.txt') with open(path, 'wb') as fd: fd.write(b'woot') os.makedirs(os.path.join(dirn, 'nest')) with open(os.path.join(dirn, 'nest', 'nope.txt'), 'wb') as fd: fd.write(b'nope') retn = tuple(s_common.listdir(dirn)) self.len(2, retn) retn = tuple(s_common.listdir(dirn, glob='*.txt')) self.eq(retn, ((path, )))
def test_common_file_helpers(self): # genfile with self.getTestDir() as testdir: fd = s_common.genfile(testdir, 'woot', 'foo.bin') fd.write(b'genfile_test') fd.close() with open(os.path.join(testdir, 'woot', 'foo.bin'), 'rb') as fd: buf = fd.read() self.eq(buf, b'genfile_test') # reqpath with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'') self.eq(os.path.join(testdir, 'test.txt'), s_common.reqpath(testdir, 'test.txt')) self.raises(s_exc.NoSuchFile, s_common.reqpath, testdir, 'newp') # reqfile with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'reqfile_test') fd = s_common.reqfile(testdir, 'test.txt') buf = fd.read() self.eq(buf, b'reqfile_test') fd.close() self.raises(s_exc.NoSuchFile, s_common.reqfile, testdir, 'newp') # getfile with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'getfile_test') fd = s_common.getfile(testdir, 'test.txt') buf = fd.read() self.eq(buf, b'getfile_test') fd.close() self.none(s_common.getfile(testdir, 'newp')) # getbytes with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'getbytes_test') buf = s_common.getbytes(testdir, 'test.txt') self.eq(buf, b'getbytes_test') self.none(s_common.getbytes(testdir, 'newp')) # reqbytes with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'reqbytes_test') buf = s_common.reqbytes(testdir, 'test.txt') self.eq(buf, b'reqbytes_test') self.raises(s_exc.NoSuchFile, s_common.reqbytes, testdir, 'newp') # listdir with self.getTestDir() as dirn: path = os.path.join(dirn, 'woot.txt') with open(path, 'wb') as fd: fd.write(b'woot') os.makedirs(os.path.join(dirn, 'nest')) with open(os.path.join(dirn, 'nest', 'nope.txt'), 'wb') as fd: fd.write(b'nope') retn = tuple(s_common.listdir(dirn)) self.len(2, retn) retn = tuple(s_common.listdir(dirn, glob='*.txt')) self.eq(retn, ((path, ))) # getDirSize: check against du real, appr = s_common.getDirSize(dirn) durealstr = subprocess.check_output(['du', '-B', '1', '-s', dirn]) dureal = int(durealstr.split()[0]) duapprstr = subprocess.check_output(['du', '-bs', dirn]) duappr = int(duapprstr.split()[0]) self.eq(dureal, real) self.eq(duappr, appr)
def test_common_file_helpers(self): # genfile with self.getTestDir() as testdir: fd = s_common.genfile(testdir, 'woot', 'foo.bin') fd.write(b'genfile_test') fd.close() with open(os.path.join(testdir, 'woot', 'foo.bin'), 'rb') as fd: buf = fd.read() self.eq(buf, b'genfile_test') # reqpath with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'') self.eq(os.path.join(testdir, 'test.txt'), s_common.reqpath(testdir, 'test.txt')) self.raises(s_exc.NoSuchFile, s_common.reqpath, testdir, 'newp') # reqfile with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'reqfile_test') fd = s_common.reqfile(testdir, 'test.txt') buf = fd.read() self.eq(buf, b'reqfile_test') fd.close() self.raises(s_exc.NoSuchFile, s_common.reqfile, testdir, 'newp') # getfile with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'getfile_test') fd = s_common.getfile(testdir, 'test.txt') buf = fd.read() self.eq(buf, b'getfile_test') fd.close() self.none(s_common.getfile(testdir, 'newp')) # getbytes with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'getbytes_test') buf = s_common.getbytes(testdir, 'test.txt') self.eq(buf, b'getbytes_test') self.none(s_common.getbytes(testdir, 'newp')) # reqbytes with self.getTestDir() as testdir: with s_common.genfile(testdir, 'test.txt') as fd: fd.write(b'reqbytes_test') buf = s_common.reqbytes(testdir, 'test.txt') self.eq(buf, b'reqbytes_test') self.raises(s_exc.NoSuchFile, s_common.reqbytes, testdir, 'newp') # listdir with self.getTestDir() as dirn: path = os.path.join(dirn, 'woot.txt') with open(path, 'wb') as fd: fd.write(b'woot') os.makedirs(os.path.join(dirn, 'nest')) with open(os.path.join(dirn, 'nest', 'nope.txt'), 'wb') as fd: fd.write(b'nope') retn = tuple(s_common.listdir(dirn)) self.len(2, retn) retn = tuple(s_common.listdir(dirn, glob='*.txt')) self.eq(retn, ((path,)))
async def test_multislabseqn_cull(self): with self.getTestDir() as dirn: async with await s_multislabseqn.MultiSlabSeqn.anit(dirn) as msqn: for i in range(10): await msqn.add(f'foo{i}') # cull with only one seqn self.true(await msqn.cull(2)) await self.asyncraises(s_exc.BadIndxValu, msqn.get(2)) await msqn.rotate() for i in range(10, 15): await msqn.add(f'foo{i}') # A no-op self.false(await msqn.cull(99)) # Ensure there's a cached slab retn = await msqn.get(4) self.eq('foo4', retn) self.true(await msqn.cull(4)) await self.asyncraises(s_exc.BadIndxValu, msqn.get(4)) await self.asyncraises(s_exc.BadIndxValu, msqn.add('foo12', indx=4)) retn = await msqn.get(5) self.eq('foo5', retn) it = msqn.iter(0) retn = await it.__anext__() await self.asyncraises(s_exc.SlabInUse, msqn.cull(6)) await it.aclose() # culling on the tail just moves firstindx forward it = msqn.iter(10) retn = await it.__anext__() await msqn.cull(11) self.eq(12, msqn.firstindx) await it.aclose() retn = await alist(msqn.iter(1)) self.eq([(12, 'foo12'), (13, 'foo13'), (14, 'foo14')], retn) await self.asyncraises(s_exc.BadIndxValu, msqn.get(11)) await self.asyncraises(s_exc.BadIndxValu, msqn.add('foo', indx=11)) # Make sure it deleted a slab fns = sorted(s_common.listdir(dirn, glob='*.lmdb')) self.len(1, fns) # Make sure ranges are updated self.eq([10], msqn._ranges) # Can't cull before firstidx self.false(await msqn.cull(10)) async with await s_multislabseqn.MultiSlabSeqn.anit(dirn) as msqn: await msqn.cull(13) retn = await alist(msqn.iter(1)) self.eq([(14, 'foo14')], retn)
async def test_multislabseqn(self): with self.getTestDir() as dirn: async with await s_multislabseqn.MultiSlabSeqn.anit(dirn) as msqn: s_multislabseqn.logger.debug(f'Repr test {msqn}') self.eq(0, msqn.index()) retn = await alist(msqn.iter(0)) self.eq([], retn) retn = await msqn.last() self.eq(None, retn) retn = await msqn.rotate() self.eq(0, retn) self.false(await msqn.cull(0)) retn = await msqn.add('foo') self.eq(0, retn) retn = await msqn.add('foo2') self.eq(1, retn) retn = await alist(msqn.iter(0)) self.eq([(0, 'foo'), (1, 'foo2')], retn) retn = await alist(msqn.iter(1)) self.eq([(1, 'foo2')], retn) retn = await msqn.add('foo9', indx=9) self.eq(9, retn) retn = await alist(msqn.iter(0)) self.eq([(0, 'foo'), (1, 'foo2'), (9, 'foo9')], retn) evnt1 = msqn.getOffsetEvent(9) self.true(evnt1.is_set()) self.true(await msqn.waitForOffset(9, timeout=0.5)) evnt1 = msqn.getOffsetEvent(10) self.false(await msqn.waitForOffset(10, timeout=0.1)) retn = await msqn.add('foo10') self.eq(10, retn) retn = await alist(msqn.iter(0)) self.eq([(0, 'foo'), (1, 'foo2'), (9, 'foo9'), (10, 'foo10')], retn) retn = await msqn.last() self.eq((10, 'foo10'), retn) retn = msqn.index() self.eq(11, retn) self.eq(4, msqn.tailseqn.size) fns = sorted(s_common.listdir(dirn, glob='*.lmdb')) self.len(1, fns) # cause a rotation self.eq(11, await msqn.rotate()) self.eq(11, msqn.index()) self.eq((10, 'foo10'), await msqn.last()) retn = await alist(msqn.iter(0)) self.eq([(0, 'foo'), (1, 'foo2'), (9, 'foo9'), (10, 'foo10')], retn) self.len(0, [x for x in msqn.tailseqn.iter(0)]) fns = sorted(s_common.listdir(dirn, glob='*.lmdb')) self.len(2, fns) # Need one entry so can't cull at >= 10 self.false(await msqn.cull(10)) self.false(await msqn.cull(11)) self.true(await msqn.cull(9)) fns = sorted(s_common.listdir(dirn, glob='*.lmdb')) self.len(2, fns) retn = await alist(msqn.iter(0)) self.eq([(10, 'foo10')], retn) # Once we write into tailseqn we can actually remove the rotated seqn await msqn.add('foo11') retn = await alist(msqn.iter(0)) self.eq([(10, 'foo10'), (11, 'foo11')], retn) self.true(await msqn.cull(10)) fns = sorted(s_common.listdir(dirn, glob='*.lmdb')) self.len(1, fns) retn = await alist(msqn.iter(0)) self.eq([(11, 'foo11')], retn) self.eq((11, 'foo11'), await msqn.last()) # Add some values and rotate for persistence check await msqn.add('foo12') await msqn.rotate() await msqn.add('foo13') await msqn.add('foo14') exp = [(11, 'foo11'), (12, 'foo12'), (13, 'foo13'), (14, 'foo14')] self.eq(exp, await alist(msqn.iter(0))) # Persistence check async with await s_multislabseqn.MultiSlabSeqn.anit(dirn) as msqn: self.eq(exp, await alist(msqn.iter(0))) self.eq((14, 'foo14'), await msqn.last()) self.eq('foo11', await msqn.get(11)) self.eq('foo12', await msqn.get(12)) self.eq('foo13', await msqn.get(13)) self.eq('foo14', await msqn.get(14)) retn = await alist(msqn.gets(9, wait=False)) self.eq(exp, retn) evnt = asyncio.Event() async def getter(): retn = [] async for item in msqn.gets(9): evnt.set() if item[1] == 'done': return retn retn.append(item) return retn task = msqn.schedCoro(getter()) await s_coro.event_wait(evnt, timeout=1) await msqn.add('done') retn = await asyncio.wait_for(task, timeout=1) self.eq(exp, retn) # Add entries not on the tail retn = await msqn.add('foo11b', indx=11) self.eq(11, retn) retn = await msqn.add('foo13b', indx=13) self.eq(13, retn) await self.asyncraises(s_exc.BadIndxValu, msqn.add('foo7', indx=7)) retn = await alist(msqn.iter(1)) exp = [(11, 'foo11b'), (12, 'foo12'), (13, 'foo13b'), (14, 'foo14'), (15, 'done')] self.eq(exp, retn) # Give a chance for the non-iterated async generators to get cleaned up await asyncio.sleep(0) await asyncio.sleep(0) # Make sure we're not holding onto more than 2 slabs # rotate await msqn.add('foo16') await msqn.rotate() await msqn.add('foo17') await msqn.add('foo18') fns = sorted(s_common.listdir(dirn, glob='*.lmdb')) self.len(3, fns) self.len(2, msqn._openslabs) retn = await msqn.get(11) # first self.eq('foo11b', retn) self.len(2, msqn._openslabs) retn = await msqn.get(14) # middle self.eq('foo14', retn) self.len(2, msqn._openslabs) retn = await msqn.get(17) # tail self.eq('foo17', retn) self.len(2, msqn._openslabs) # Make sure we don't open the same slab twice # Keep a ref to the first slab it = msqn.iter(0) retn = await it.__anext__() self.eq(retn, (11, 'foo11b')) self.len(2, msqn._openslabs) # (Need to evict first slab ref from the cacheslab) self.true(msqn._cacheslab.path.endswith('b.lmdb')) retn = await msqn.get(14) self.eq(retn, 'foo14') self.true(msqn._cacheslab.path.endswith('d.lmdb')) # Should have the tail slab, the cache slab, and the open iterator slabs self.len(3, msqn._openslabs) retn = await msqn.get(12) self.eq(retn, 'foo12') retn = await alist(it) self.len(7, retn) # Iterator exhausted: should have just the cache slab (10) and the tail slab (20) self.len(2, msqn._openslabs)
async def _discoverRanges(self): ''' Go through the slabs and get the starting indices of the sequence in each slab ''' fnstartidx = 0 lastidx = None self._ranges: List[int] = [ ] # Starting offsets of all the slabs in order self.firstindx = 0 # persistently-stored indicator of lowest index self.indx = 0 # The next place an add() will go lowindx = None # Make sure the files are in order for fn in sorted( s_common.listdir(self.dirn, glob='*seqn' + '[abcdef01234567890]' * 16 + '.lmdb')): if not os.path.isdir(fn): logger.warning( f'Found a non-directory {fn} where a directory should be') continue match = seqnslabre.match(os.path.basename(fn)) assert match newstartidx = int(match.group(1), 16) assert newstartidx >= fnstartidx fnstartidx = newstartidx if lowindx is None: lowindx = fnstartidx if lastidx is not None: if fnstartidx <= lastidx: mesg = f'Multislab: overlapping files ({fn}). Previous last index is {lastidx}.' raise s_exc.BadCoreStore(mesg=mesg) if fnstartidx != lastidx + 1: logger.debug( f'Multislab: gap in indices at {fn}. Previous last index is {lastidx}.' ) async with await s_lmdbslab.Slab.anit(fn, **self.slabopts) as slab: self.firstindx = self._getFirstIndx(slab) # We use the old name of the sequence to ease migration from the old system seqn = slab.getSeqn('nexuslog') firstitem = seqn.first() if firstitem is None: self.indx = fnstartidx else: self.indx = seqn.indx firstidx = firstitem[ 0] # might not match the separately stored first index due to culling if firstidx < fnstartidx: raise s_exc.BadCoreStore( 'Multislab: filename inconsistent with contents') lastidx = seqn.index() - 1 self._ranges.append(fnstartidx) # An admin might have manually culled by rm'ing old slabs. Update firstidx accordingly. if lowindx is not None and lowindx > self.firstindx: self.firstindx = lowindx if self.firstindx > self.indx: raise s_exc.BadCoreStore('Invalid firstindx value') await self._initTailSlab(fnstartidx)