示例#1
0
def dump_rows(outp, fd, store, compress=False, genrows_kwargs=None):
    outp.printf('Starting row dump')
    if not genrows_kwargs:
        genrows_kwargs = {}
    i = 0
    j = 0
    cur_bytes = 0
    bufs = []
    kwargs = preset_args.get(store.getStoreType(), {})
    kwargs.update(genrows_kwargs)
    tick = time.time()

    for rows in store.genStoreRows(**kwargs):
        j += len(rows)
        i += len(rows)
        tufo = s_tufo.tufo('core:save:add:rows', rows=rows)
        if compress:
            tufo[1]['rows'] = gzip.compress(s_common.msgenpack(rows), 9)
        byts = s_common.msgenpack(tufo)
        bufs.append(byts)
        cur_bytes += len(byts)
        if cur_bytes > s_axon.megabyte * DUMP_MEGS:
            fd.write(b''.join([byts for byts in bufs]))
            outp.printf('Stored {} rows, total {} rows'.format(j, i))
            bufs = []
            cur_bytes = 0
            j = 0
    # There still may be rows we need too write out.
    if bufs:
        fd.write(b''.join([byts for byts in bufs]))
        outp.printf('Stored {} rows, total {} rows'.format(j, i))
        bufs = []
    tock = time.time()
    outp.printf('Done dumping rows - took {} seconds.'.format(tock - tick))
    outp.printf('Dumped {} rows'.format(i))
示例#2
0
文件: lmdb.py 项目: e2-ibm/synapse
    def _addRows(self, rows):
        '''
        Adds a bunch of rows to the database

        Take care:  this was written this way for performance, in particular when len(rows) is
        large.
        '''
        encs = []

        with self._getTxn(write=True) as txn:
            next_pk = self.next_pk

            # First, we encode all the i, p, v, t for all rows
            for i, p, v, t in rows:
                if next_pk > MAX_PK:
                    raise s_common.HitCoreLimit(
                        name='MAX_PK',
                        size=MAX_PK,
                        mesg='Out of primary key values')
                if len(p) > MAX_PROP_LEN:
                    raise s_common.HitCoreLimit(
                        name='MAX_PROP_LEN',
                        size=MAX_PROP_LEN,
                        mesg='Property length too large')
                i_enc = _encIden(i)
                p_enc = _encProp(p)
                v_key_enc = _encValKey(v)
                t_enc = s_common.msgenpack(t)
                pk_enc = _encPk(next_pk)
                row_enc = s_common.msgenpack((i, p, v, t))

                # idx          0      1       2       3       4          5
                encs.append((i_enc, p_enc, row_enc, t_enc, v_key_enc, pk_enc))
                next_pk += 1

            # An iterator of what goes into the main table: key=pk_enc, val=encoded(i, p, v, t)
            kvs = ((x[5], x[2]) for x in encs)

            # Shove it all in at once
            consumed, added = txn.cursor(self.rows).putmulti(kvs,
                                                             overwrite=False,
                                                             append=True)
            if consumed != added or consumed != len(encs):
                # Will only fail if record already exists, which should never happen
                raise s_common.BadCoreStore(store='lmdb',
                                            mesg='unexpected pk in DB')

            # Update the indices for all rows
            kvs = ((x[0] + x[1], x[5]) for x in encs)
            txn.cursor(self.index_ip).putmulti(kvs, dupdata=True)
            kvs = ((x[1] + x[4] + x[3], x[5]) for x in encs)
            txn.cursor(self.index_pvt).putmulti(kvs, dupdata=True)
            kvs = ((x[1] + x[3], x[5]) for x in encs)
            txn.cursor(self.index_pt).putmulti(kvs, dupdata=True)

            # self.next_pk should be protected from multiple writers. Luckily lmdb write lock does
            # that for us.
            self.next_pk = next_pk
示例#3
0
def dump_blobs(outp, fd, store):
    i = 0
    outp.printf('Dumping blobstore')
    for key in store.getBlobKeys():
        valu = store.getBlobValu(key)
        tufo = s_tufo.tufo('syn:core:blob:set',
                           key=key,
                           valu=s_common.msgenpack(valu))
        byts = s_common.msgenpack(tufo)
        fd.write(byts)
        i += 1
    outp.printf('Done dumping {} keys from blobstore.'.format(i))
示例#4
0
def _calcFirstLastKeys(prop, valu, mintime, maxtime):
    '''
    Returns the encoded bytes for the start and end keys to the pt or pvt
    index.  Helper function for _{get,del}RowsByProp
    '''
    p_enc = _encProp(prop)
    v_key_enc = b'' if valu is None else _encValKey(valu)
    v_is_hashed = valu is not None and (v_key_enc[0] == HASH_VAL_MARKER_ENC)
    if mintime is None and maxtime is None:
        return (p_enc + v_key_enc, None, v_is_hashed, True)
    mintime_enc = b'' if mintime is None else s_common.msgenpack(mintime)
    maxtime_enc = MAX_TIME_ENC if maxtime is None else s_common.msgenpack(maxtime)

    first_key = p_enc + v_key_enc + mintime_enc
    last_key = p_enc + v_key_enc + maxtime_enc
    return (first_key, last_key, v_is_hashed, False)
示例#5
0
def hashitem(item):
    '''
    Generate a uniq hash for the JSON compatible primitive data structure.
    '''
    norm = normitem(item)
    byts = s_common.msgenpack(norm)
    return hashlib.md5(byts).hexdigest()
示例#6
0
    def tx(self, mesg):
        '''
        Transmit a mesg tufo ( type, info ) via the socket using msgpack.
        If present this API is safe for use with a socket in a Plex().
        '''

        byts = s_common.msgenpack(mesg)
        return self.txbytes(byts)
示例#7
0
    def _delBlobValu(self, key):
        key_byts = s_common.msgenpack(key.encode('utf-8'))
        with self._getTxn(write=True) as txn:  # type: lmdb.Transaction
            ret = txn.pop(key_byts, db=self.blob_store)

            if ret is None:  # pragma: no cover
                # We should never get here, but if we do, throw an exception.
                raise s_common.NoSuchName(name=key, mesg='Cannot delete key which is not present in the blobstore.')
        return ret
示例#8
0
def _encValKey(v):
    '''
    Encode a value as used in a key.

    Non-negative numbers are msgpack encoded.  Negative numbers are encoded as a marker, then the
    encoded negative of that value, so that the ordering of the encodings is easily mapped to the
    ordering of the negative numbers.  Strings too long are hashed.  Note that this scheme prevents
    interleaving of value types: all string encodings compare larger than all negative number
    encodings compare larger than all nonnegative encodings.
    '''
    if s_compat.isint(v):
        if v >= 0:
            return s_common.msgenpack(v)
        else:
            return NEGATIVE_VAL_MARKER_ENC + s_common.msgenpack(-v)
    else:
        if len(v) >= LARGE_STRING_SIZE:
            return (HASH_VAL_MARKER_ENC + s_common.msgenpack(xxhash.xxh64(v).intdigest()))
        else:
            return STRING_VAL_MARKER_ENC + s_common.msgenpack(v)
示例#9
0
文件: lmdb.py 项目: e2-ibm/synapse
    def _delRowAndIndices(self,
                          txn,
                          pk_enc,
                          i_enc=None,
                          p_enc=None,
                          v_key_enc=None,
                          t_enc=None,
                          delete_ip=True,
                          delete_pvt=True,
                          delete_pt=True,
                          only_if_val=None):
        ''' Deletes the row corresponding to pk_enc and the indices pointing to it '''
        with txn.cursor(self.rows) as cursor:
            if not cursor.set_key(pk_enc):
                raise s_common.BadCoreStore(store='lmdb', mesg='Missing PK')
            i, p, v, t = s_common.msgunpack(cursor.value())

            if only_if_val is not None and only_if_val != v:
                return False
            cursor.delete()

        if delete_ip and i_enc is None:
            i_enc = _encIden(i)

        if p_enc is None:
            p_enc = _encProp(p)

        if delete_pvt and v_key_enc is None:
            v_key_enc = _encValKey(v)

        if (delete_pvt or delete_pt) and t_enc is None:
            t_enc = s_common.msgenpack(t)

        if delete_ip:
            # Delete I-P index entry
            if not txn.delete(i_enc + p_enc, value=pk_enc, db=self.index_ip):
                raise s_common.BadCoreStore(store='lmdb',
                                            mesg='Missing I-P index')

        if delete_pvt:
            # Delete P-V-T index entry
            if not txn.delete(p_enc + v_key_enc + t_enc,
                              value=pk_enc,
                              db=self.index_pvt):
                raise s_common.BadCoreStore(store='lmdb',
                                            mesg='Missing P-V-T index')

        if delete_pt:
            # Delete P-T index entry
            if not txn.delete(p_enc + t_enc, value=pk_enc, db=self.index_pt):
                raise s_common.BadCoreStore(store='lmdb',
                                            mesg='Missing P-T index')

        return True
示例#10
0
    def _txSockMesg(self, sock, mesg):
        # handle the need to send on a socket in the plex
        byts = s_common.msgenpack(mesg)
        if len(byts) > 50000 and sock.get('sock:can:gzip'):
            byts = sockgzip(byts)

        with self._plex_lock:

            # we have no backlog!
            if sock.txbuf is None:

                byts = sock._tx_xform(byts)

                try:

                    sent = sock.send(byts)

                except ssl.SSLError as e:
                    # FIXME isolate this filth within link modules.
                    sent = 0
                    if e.errno != 3:
                        #logger.exception(e)
                        sock.fini()
                        return

                except Exception as e:
                    #logger.exception(e)
                    sock.fini()
                    return

                blen = len(byts)
                if sent == blen:
                    return

                # our send was a bit short...
                sock.txbuf = byts[sent:]
                sock.txsize += (blen - sent)
                sock.fire('sock:tx:size', size=sock.txsize)

                self._plex_txsocks.append(sock)
                self._plexWake()
                return

            # so... we have a backlog...
            sock.txque.append(byts)

            sock.txsize += len(byts)
            sock.fire('sock:tx:size', size=sock.txsize)
示例#11
0
    def tx(self, mesg):
        '''
        Transmit a mesg tufo ( type, info ) via the socket using msgpack.
        If present this API is safe for use with a socket in a Plex().
        '''
        if self.plex is not None:
            return self.plex._txSockMesg(self, mesg)

        try:
            byts = s_common.msgenpack(mesg)

            if len(byts) > 50000 and self.get('sock:can:gzip'):
                byts = sockgzip(byts)

            self.sendall(byts)
            return True

        except socket.error as e:
            self.fini()
            return False
示例#12
0
    def add(self, item):
        '''
        Add an item to the persistance storage.
        '''
        byts = s_common.msgenpack(item)
        size = len(byts)

        with self.fdlock:

            if self.isfini:
                raise s_common.IsFini()

            if self.fdoff != self.size:
                self.fd.seek(0, os.SEEK_END)

            off = self.size

            self.fd.write(byts)

            self.size += len(byts)
            self.fdoff = self.size

            return (off, size)
示例#13
0
def main(argv, outp=None):

    if outp is None:  # pragma: no cover
        outp = s_output.OutPut()
    parser = makeargpaser()
    opts = parser.parse_args(argv)

    if not opts.verbose:
        logging.disable(logging.DEBUG)

    if os.path.isfile(opts.output) and not opts.force:
        outp.printf('Cannot overwrite a backup.')
        return 1

    genrows_kwargs = {}
    if opts.extra_args:
        with open(opts.extra_args, 'rb') as fd:
            genrows_kwargs = json.loads(fd.read().decode())

    storconf = {'rev:storage': False}
    if opts.revstorage:
        storconf['rev:storage'] = True

    backup_tufo = gen_backup_tufo(opts)

    with open(opts.output, 'wb') as fd:
        fd.write(s_common.msgenpack(backup_tufo))
        with s_cortex.openstore(opts.store, storconf=storconf) as store:
            dump_store(outp,
                       fd,
                       store,
                       compress=opts.compress,
                       dump_blobstore=opts.dump_blobstore,
                       genrows_kwargs=genrows_kwargs)

    outp.printf('Fin')
    return 0
示例#14
0
    def setBlobValu(self, key, valu):
        '''
        Set a value from the blob key/value (KV) store.

        This resides below the tufo storage layer and is Cortex implementation
        dependent. In purely memory backed cortexes, this KV store may not be
        persistent, even if the tufo-layer is persistent, through something
        such as the savefile mechanism.

        Notes:
            Data which is stored in the KV store is msgpacked, so caveats with
            that apply.

        Args:
            key (str): Name of the value to store.
            valu: Value to store in the KV store.

        Returns:
            The input value, unchanged.
        '''
        buf = s_common.msgenpack(valu)
        self._setBlobValu(key, buf)
        self.savebus.fire('syn:core:blob:set', key=key, valu=buf)
        return valu
示例#15
0
 def getMeldBytes(self):
     '''
     Return a msgpack packed copy of the MindMeld dictionary.
     '''
     return s_common.msgenpack(self.info)
示例#16
0
文件: types.py 项目: e2-ibm/synapse
def enMsgB64(item):
    # FIXME find a way to go directly from binary bytes to
    # base64 *string* to avoid the extra decode pass..
    return base64.b64encode(s_common.msgenpack(item)).decode('utf8')
示例#17
0
MAX_PK_BYTES = 8 if sys.maxsize > 2**32 else 4

# Prefix to indicate that a v is a nonnegative value
NONNEGATIVE_VAL_MARKER = 0

# Prefix to indicate that a v is a negative value
NEGATIVE_VAL_MARKER = -1

# Prefix to indicate than a v is a string
STRING_VAL_MARKER = -2

# Prefix to indicate that a v is hash of a string
HASH_VAL_MARKER = -3

# The negative marker encoded
NEGATIVE_VAL_MARKER_ENC = s_common.msgenpack(NEGATIVE_VAL_MARKER)

# The string marker encoded
STRING_VAL_MARKER_ENC = s_common.msgenpack(STRING_VAL_MARKER)

# The hash marker encoded
HASH_VAL_MARKER_ENC = s_common.msgenpack(HASH_VAL_MARKER)

# Number of bytes in a UUID
UUID_SIZE = 16

# An index key can't ever be larger (lexicographically) than this
MAX_INDEX_KEY = b'\xff' * 20

# String vals of this size or larger will be truncated and hashed in index.  What this means is
# that comparison on large string vals require retrieving the row from the main table
示例#18
0
 def _getBlobValu(self, key):
     key_byts = s_common.msgenpack(key.encode('utf-8'))
     with self._getTxn() as txn:  # type: lmdb.Transaction
         ret = txn.get(key_byts, default=None, db=self.blob_store)
     return ret
示例#19
0
 def _setBlobValu(self, key, valu):
     key_byts = s_common.msgenpack(key.encode('utf-8'))
     with self._getTxn(write=True) as txn:  # type: lmdb.Transaction
         txn.put(key_byts, valu, overwrite=True, db=self.blob_store)
示例#20
0
def sockgzip(byts):
    blen = len(byts)
    byts = zlib.compress(byts)
    #print('GZIP DELTA: %d -> %d' % (blen,len(byts)))
    return s_common.msgenpack(('sock:gzip', {'data': byts}))
示例#21
0
 def savemesg(mesg):
     fd.write(s_common.msgenpack(mesg))