def copydb(self, sourcedbname, destslab, destdbname=None, progresscb=None): ''' Copy an entire database in this slab to a new database in potentially another slab. Args: sourcedbname (str): name of the db in the source environment destslab (LmdbSlab): which slab to copy rows to destdbname (str): the name of the database to copy rows to in destslab progresscb (Callable[int]): if not None, this function will be periodically called with the number of rows completed Returns: (int): the number of rows copied Note: If any rows already exist in the target database, this method returns an error. This means that one cannot use destdbname=None unless there are no explicit databases in the destination slab. ''' sourcedb, dupsort = self.dbnames[sourcedbname] destslab.initdb(destdbname, dupsort) destdb, _ = destslab.dbnames[destdbname] statdict = destslab.stat(db=destdbname) if statdict['entries'] > 0: raise s_exc.DataAlreadyExists() rowcount = 0 for chunk in s_common.chunks(self.scanByFull(db=sourcedbname), COPY_CHUNKSIZE): ccount, acount = destslab.putmulti(chunk, dupdata=True, append=True, db=destdbname) if ccount != len(chunk) or acount != len(chunk): raise s_exc.BadCoreStore( mesg='Unexpected number of values written' ) # pragma: no cover rowcount += len(chunk) if progresscb is not None and 0 == (rowcount % PROGRESS_PERIOD): progresscb(rowcount) return rowcount
async def _migrateV1toV2(self, nexspath, logpath): ''' Close the slab, move it to the new multislab location, then copy out the nexshot values, then drop the nexshot db from the multislab ''' logger.warning(f'Migrating Nexus log v1->v2 for {nexspath}') if os.path.ismount(nexspath): # pragma: no cover # Fail fast if the nexspath is its own mountpoint. mesg = f'The nexpath={nexspath} is located at its own mount point. This configuration cannot be migrated.' raise s_exc.BadCoreStore(mesg=mesg, nexspath=nexspath) # avoid import cycle import synapse.lib.lmdbslab as s_lmdbslab import synapse.lib.multislabseqn as s_multislabseqn # Grab the initial index value seqn = self.nexsslab.getSeqn('nexuslog') first = seqn.first() if first is None: # Nothing in the sequence. Drop and move along. self.nexsslab.dropdb('nexuslog') self.nexshot.set('version', 2) logger.warning('Nothing in the nexuslog sequence to migrate.') logger.warning('...Nexus log migration complete') return await self.nexsslab.fini() firstidx = first[0] fn = s_multislabseqn.MultiSlabSeqn.slabFilename(logpath, firstidx) logger.warning( f'Existing nexslog will be migrated from {nexspath} to {fn}') if os.path.exists(fn): # pragma: no cover logger.warning( f'Removing old migration which may have failed. This should not exist: {fn}' ) shutil.rmtree(fn) os.makedirs(fn, exist_ok=True) logger.warning(f'Moving existing nexslog') try: os.replace(nexspath, fn) except OSError as e: # pragma: no cover logger.exception('Error during nexslog migration.') raise s_exc.BadCoreStore(mesg='Error during nexslogV1toV2', nexspath=nexspath, fn=fn) from e # Open a fresh slab where the old one used to be logger.warning( f'Re-opening fresh nexslog slab at {nexspath} for nexshot') self.nexsslab = await s_lmdbslab.Slab.anit(nexspath, map_async=self.map_async) self.nexshot = await self.nexsslab.getHotCount('nexs:indx') logger.warning( 'Copying nexs:indx data from migrated slab to the fresh nexslog') # There's only one value in nexs:indx, so this should be fast async with await s_lmdbslab.Slab.anit(fn) as newslab: olddb = self.nexsslab.initdb('nexs:indx') self.nexsslab.dropdb(olddb) db = newslab.initdb('nexs:indx') newslab.copydb('nexs:indx', self.nexsslab, destdbname='nexs:indx') newslab.dropdb(db) self.nexshot.set('version', 2) logger.warning('...Nexus log migration complete')
async def _discoverRanges(self): ''' Go through the slabs and get the starting indices of the sequence in each slab ''' fnstartidx = 0 lastidx = None self._ranges: List[int] = [ ] # Starting offsets of all the slabs in order self.firstindx = 0 # persistently-stored indicator of lowest index self.indx = 0 # The next place an add() will go lowindx = None # Make sure the files are in order for fn in sorted( s_common.listdir(self.dirn, glob='*seqn' + '[abcdef01234567890]' * 16 + '.lmdb')): if not os.path.isdir(fn): logger.warning( f'Found a non-directory {fn} where a directory should be') continue match = seqnslabre.match(os.path.basename(fn)) assert match newstartidx = int(match.group(1), 16) assert newstartidx >= fnstartidx fnstartidx = newstartidx if lowindx is None: lowindx = fnstartidx if lastidx is not None: if fnstartidx <= lastidx: mesg = f'Multislab: overlapping files ({fn}). Previous last index is {lastidx}.' raise s_exc.BadCoreStore(mesg=mesg) if fnstartidx != lastidx + 1: logger.debug( f'Multislab: gap in indices at {fn}. Previous last index is {lastidx}.' ) async with await s_lmdbslab.Slab.anit(fn, **self.slabopts) as slab: self.firstindx = self._getFirstIndx(slab) # We use the old name of the sequence to ease migration from the old system seqn = slab.getSeqn('nexuslog') firstitem = seqn.first() if firstitem is None: self.indx = fnstartidx else: self.indx = seqn.indx firstidx = firstitem[ 0] # might not match the separately stored first index due to culling if firstidx < fnstartidx: raise s_exc.BadCoreStore( 'Multislab: filename inconsistent with contents') lastidx = seqn.index() - 1 self._ranges.append(fnstartidx) # An admin might have manually culled by rm'ing old slabs. Update firstidx accordingly. if lowindx is not None and lowindx > self.firstindx: self.firstindx = lowindx if self.firstindx > self.indx: raise s_exc.BadCoreStore('Invalid firstindx value') await self._initTailSlab(fnstartidx)