示例#1
0
def snappy_pack_blob(obj, sep=SEP):
    if obj is None:
        return ""
    c = obj.dtype.char
    if c == "S":
        return "S" + snappy.compress(sep.join(obj))
    return buffer(c + snappy.compress(obj.tobytes()))
示例#2
0
 def snappy_pack_blob(obj, sep=SEP):
     if obj is None: return ''
     c = obj.dtype.char
     if c == 'S' or c == 'U':
         return b'U' + snappy.compress(sep.join(obj))
     return buffer(
         c.encode('utf-8') + snappy.compress(obj.tobytes(), 'utf8'))
示例#3
0
def snappy_encode(payload,
                  xerial_compatible=False,
                  xerial_blocksize=32 * 1024):
    """Encodes the given data with snappy compression.

    If xerial_compatible is set then the stream is encoded in a fashion
    compatible with the xerial snappy library.

    The block size (xerial_blocksize) controls how frequent the blocking occurs
    32k is the default in the xerial library.

    The format winds up being:


        +-------------+------------+--------------+------------+--------------+
        |   Header    | Block1 len | Block1 data  | Blockn len | Blockn data  |
        +-------------+------------+--------------+------------+--------------+
        |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
        +-------------+------------+--------------+------------+--------------+


    It is important to note that the blocksize is the amount of uncompressed
    data presented to snappy at each block, whereas the blocklen is the number
    of bytes that will be present in the stream; so the length will always be
    <= blocksize.

    """

    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if xerial_compatible:

        def _chunker():
            for i in xrange(0, len(payload), xerial_blocksize):
                yield payload[i:i + xerial_blocksize]

        out = BytesIO()

        header = b''.join([
            struct.pack('!' + fmt, dat)
            for fmt, dat in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)
        ])

        out.write(header)
        for chunk in _chunker():
            block = snappy.compress(chunk)
            block_size = len(block)
            out.write(struct.pack('!i', block_size))
            out.write(block)

        out.seek(0)
        return out.read()

    else:
        return snappy.compress(payload)
示例#4
0
文件: views_test.py 项目: ymc/hue
def test_view_snappy_compressed():
  if not snappy_installed():
    raise SkipTest
  import snappy

  cluster = pseudo_hdfs4.shared_cluster()
  finish = []
  try:
    c = make_logged_in_client()
    cluster.fs.setuser(cluster.superuser)
    if cluster.fs.isdir('/tmp/test-snappy-filebrowser'):
      cluster.fs.rmtree('/tmp/test-snappy-filebrowser')

    cluster.fs.mkdir('/tmp/test-snappy-avro-filebrowser/')

    f = cluster.fs.open('/tmp/test-snappy-filebrowser/test-view.snappy', "w")
    f.write(snappy.compress('This is a test of the emergency broadcasting system.'))
    f.close()

    f = cluster.fs.open('/tmp/test-snappy-filebrowser/test-view.stillsnappy', "w")
    f.write(snappy.compress('The broadcasters of your area in voluntary cooperation with the FCC and other authorities.'))
    f.close()

    f = cluster.fs.open('/tmp/test-snappy-filebrowser/test-view.notsnappy', "w")
    f.write('foobar')
    f.close()

    # Snappy compressed fail
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.notsnappy?compression=snappy')
    assert_true('Failed to decompress' in response.context['message'], response)

    # Snappy compressed succeed
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.snappy')
    assert_equal('snappy', response.context['view']['compression'])
    assert_equal(response.context['view']['contents'], 'This is a test of the emergency broadcasting system.', response)

    # Snappy compressed succeed
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.stillsnappy')
    assert_equal('snappy', response.context['view']['compression'])
    assert_equal(response.context['view']['contents'], 'The broadcasters of your area in voluntary cooperation with the FCC and other authorities.', response)

    # Largest snappy compressed file
    finish.append( MAX_SNAPPY_DECOMPRESSION_SIZE.set_for_testing(1) )
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.stillsnappy?compression=snappy')
    assert_true('File size is greater than allowed max snappy decompression size of 1' in response.context['message'], response)

  finally:
    for done in finish:
      done()
    try:
      cluster.fs.rmtree('/test-snappy-avro-filebrowser/')
    except:
      pass      # Don't let cleanup errors mask earlier failures
示例#5
0
def transmit(result, sock):
    pickler = pickle.Pickler(sock)
    cols = list(result.keys())
    pickler.dump(cols)

    for col in cols:
        if (result[col].dtype == object):
            colz = snappy.compress(pickle.dumps(result[col]))
        else:
            colz = snappy.compress(result[col])
        pickler.dump(result[col].dtype)
        pickler.dump(colz)
示例#6
0
    def record_question(self, post: dict):

        # update set of users_ids (users with activity)
        for user_id in post.get("users_ids"):
            self._all_users_ids.add(user_id)

        # add this postId to the ordered list of questions sorted by score
        self.pipe.zadd(self.questions_key(),
                       mapping={post["Id"]: post["Score"]},
                       nx=True)

        # Add this question's PostId to the ordered questions sets of all its tags
        for tag in post.get("Tags", []):
            self.pipe.zadd(self.tag_key(tag),
                           mapping={post["Id"]: post["Score"]},
                           nx=True)

        # store int for user Ids (most use) to save some space in redis
        # names stored as str thus belong to deleted users. this prevents del users
        # with a name such as "3200" to be considered User#3200
        if post.get("OwnerUserId"):
            post["OwnerName"] = int(post["OwnerUserId"])

        # store question details
        self.pipe.setnx(
            self.question_key(post["Id"]),
            snappy.compress(
                json.dumps((
                    post["CreationTimestamp"],
                    post["OwnerName"],
                    post["has_accepted"],
                    post["nb_answers"],
                    # Tag ID can be None in the event a Tag existed and was not used
                    # but got used first during the dumping process, after the Tags
                    # were dumped but before questions we fully dumped.
                    # SO Tag `imac` in 2021-06 dumps for instance
                    [
                        self.get_tag_id(tag) for tag in post.get("Tags", [])
                        if self.get_tag_id(tag)
                    ],
                ))),
        )

        # record question's meta: ID: title, excerpt for use in home and tag pages
        self.pipe.set(
            self.question_details_key(post["Id"]),
            snappy.compress(
                json.dumps((post["Title"], get_text(post["Body"],
                                                    strip_at=250)))),
        )

        self.bump_seen(4 + len(post.get("Tags", [])))
        self.commit_maybe()
示例#7
0
def test_view_snappy_compressed():
  if not snappy_installed():
    raise SkipTest
  import snappy

  cluster = pseudo_hdfs4.shared_cluster()
  finish = []
  try:
    c = make_logged_in_client()
    cluster.fs.setuser(cluster.superuser)
    if cluster.fs.isdir('/tmp/test-snappy-filebrowser'):
      cluster.fs.rmtree('/tmp/test-snappy-filebrowser')

    cluster.fs.mkdir('/tmp/test-snappy-avro-filebrowser/')

    f = cluster.fs.open('/tmp/test-snappy-filebrowser/test-view.snappy', "w")
    f.write(snappy.compress('This is a test of the emergency broadcasting system.'))
    f.close()

    f = cluster.fs.open('/tmp/test-snappy-filebrowser/test-view.stillsnappy', "w")
    f.write(snappy.compress('The broadcasters of your area in voluntary cooperation with the FCC and other authorities.'))
    f.close()

    f = cluster.fs.open('/tmp/test-snappy-filebrowser/test-view.notsnappy', "w")
    f.write('foobar')
    f.close()

    # Snappy compressed fail
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.notsnappy?compression=snappy')
    assert_true('Failed to decompress' in response.context['message'], response)

    # Snappy compressed succeed
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.snappy')
    assert_equal('snappy', response.context['view']['compression'])
    assert_equal(response.context['view']['contents'], 'This is a test of the emergency broadcasting system.', response)

    # Snappy compressed succeed
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.stillsnappy')
    assert_equal('snappy', response.context['view']['compression'])
    assert_equal(response.context['view']['contents'], 'The broadcasters of your area in voluntary cooperation with the FCC and other authorities.', response)

    # Largest snappy compressed file
    finish.append( MAX_SNAPPY_DECOMPRESSION_SIZE.set_for_testing(1) )
    response = c.get('/filebrowser/view/tmp/test-snappy-filebrowser/test-view.stillsnappy?compression=snappy')
    assert_true('File size is greater than allowed max snappy decompression size of 1' in response.context['message'], response)

  finally:
    for done in finish:
      done()
    try:
      cluster.fs.rmtree('/test-snappy-avro-filebrowser/')
    except:
      pass      # Don't let cleanup errors mask earlier failures
示例#8
0
def snappy_encode(payload, xerial_compatible=False, xerial_blocksize=32*1024):
    """Encodes the given data with snappy compression.

    If xerial_compatible is set then the stream is encoded in a fashion
    compatible with the xerial snappy library.

    The block size (xerial_blocksize) controls how frequent the blocking occurs
    32k is the default in the xerial library.

    The format winds up being:


        +-------------+------------+--------------+------------+--------------+
        |   Header    | Block1 len | Block1 data  | Blockn len | Blockn data  |
        +-------------+------------+--------------+------------+--------------+
        |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
        +-------------+------------+--------------+------------+--------------+


    It is important to note that the blocksize is the amount of uncompressed
    data presented to snappy at each block, whereas the blocklen is the number
    of bytes that will be present in the stream; so the length will always be
    <= blocksize.

    """

    if not has_snappy():
        raise NotImplementedError("Snappy codec is not available")

    if xerial_compatible:
        def _chunker():
            for i in xrange(0, len(payload), xerial_blocksize):
                yield payload[i:i+xerial_blocksize]

        out = BytesIO()

        header = b''.join([struct.pack('!' + fmt, dat) for fmt, dat
                           in zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER)])

        out.write(header)
        for chunk in _chunker():
            block = snappy.compress(chunk)
            block_size = len(block)
            out.write(struct.pack('!i', block_size))
            out.write(block)

        out.seek(0)
        return out.read()

    else:
        return snappy.compress(payload)
示例#9
0
def snappy_encode(payload,
                  xerial_compatible=False,
                  xerial_blocksize=32 * 1024):
    """
    Compress the given data with the Snappy algorithm.

    :param bytes payload: Data to compress.
    :param bool xerial_compatible:
        If set then the stream is broken into length-prefixed blocks in
        a fashion compatible with the xerial snappy library.

        The format winds up being::

            +-------------+------------+--------------+------------+--------------+
            |   Header    | Block1_len | Block1 data  | BlockN len | BlockN data  |
            |-------------+------------+--------------+------------+--------------|
            |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
            +-------------+------------+--------------+------------+--------------+

    :param int xerial_blocksize:
        Number of bytes per chunk to independently Snappy encode. 32k is the
        default in the xerial library.

    :returns: Compressed bytes.
    :rtype: :class:`bytes`
    """
    if not has_snappy(
    ):  # FIXME This should be static, not checked every call.
        raise NotImplementedError("Snappy codec is not available")

    if xerial_compatible:

        def _chunker():
            for i in range(0, len(payload), xerial_blocksize):
                yield payload[i:i + xerial_blocksize]

        out = BytesIO()
        out.write(_XERIAL_HEADER)

        for chunk in _chunker():
            block = snappy.compress(chunk)
            out.write(struct.pack('!i', len(block)))
            out.write(block)

        out.seek(0)
        return out.read()

    else:
        return snappy.compress(payload)
示例#10
0
def encode_snappy(buff, xerial_compatible=False, xerial_blocksize=32 * 1024):
    """Encode a buffer using snappy

    If xerial_compatible is set, the buffer is encoded in a fashion compatible
    with the xerial snappy library.

    The block size (xerial_blocksize) controls how frequently the blocking
    occurs. 32k is the default in the xerial library.

    The format is as follows:
    +-------------+------------+--------------+------------+--------------+
    |   Header    | Block1 len | Block1 data  | Blockn len | Blockn data  |
    |-------------+------------+--------------+------------+--------------|
    |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
    +-------------+------------+--------------+------------+--------------+

    It is important to note that `blocksize` is the amount of uncompressed
    data presented to snappy at each block, whereas `blocklen` is the
    number of bytes that will be present in the stream.

    Adapted from kafka-python
    https://github.com/mumrah/kafka-python/pull/127/files
    """
    #snappy segfaults if it gets a read-only buffer on PyPy
    if IS_PYPY or PY3:
        buff = bytes(buff)
    if snappy is None:
        raise ImportError("Please install python-snappy")
    if xerial_compatible:

        def _chunker():
            for i in range(0, len(buff), xerial_blocksize):
                yield buff[i:i + xerial_blocksize]

        out = BytesIO()
        full_data = list(zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER))
        header = b''.join(
            [struct.pack('!' + fmt, dat) for fmt, dat in full_data])

        out.write(header)
        for chunk in _chunker():
            block = snappy.compress(chunk)
            block_size = len(block)
            out.write(struct.pack('!i', block_size))
            out.write(block)
        out.seek(0)
        return out.read()
    else:
        return snappy.compress(buff)
示例#11
0
def encode_snappy(buff, xerial_compatible=False, xerial_blocksize=32 * 1024):
    """Encode a buffer using snappy

    If xerial_compatible is set, the buffer is encoded in a fashion compatible
    with the xerial snappy library.

    The block size (xerial_blocksize) controls how frequently the blocking
    occurs. 32k is the default in the xerial library.

    The format is as follows:
    +-------------+------------+--------------+------------+--------------+
    |   Header    | Block1 len | Block1 data  | Blockn len | Blockn data  |
    |-------------+------------+--------------+------------+--------------|
    |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
    +-------------+------------+--------------+------------+--------------+

    It is important to note that `blocksize` is the amount of uncompressed
    data presented to snappy at each block, whereas `blocklen` is the
    number of bytes that will be present in the stream.

    Adapted from kafka-python
    https://github.com/mumrah/kafka-python/pull/127/files
    """
    #snappy segfaults if it gets a read-only buffer on PyPy
    if IS_PYPY or PY3:
        buff = bytes(buff)
    if snappy is None:
        raise ImportError("Please install python-snappy")
    if xerial_compatible:
        def _chunker():
            for i in range(0, len(buff), xerial_blocksize):
                yield buff[i:i + xerial_blocksize]
        out = BytesIO()
        full_data = list(zip(_XERIAL_V1_FORMAT, _XERIAL_V1_HEADER))
        header = b''.join(
            [struct.pack('!' + fmt, dat) for fmt, dat in full_data
         ])

        out.write(header)
        for chunk in _chunker():
            block = snappy.compress(chunk)
            block_size = len(block)
            out.write(struct.pack('!i', block_size))
            out.write(block)
        out.seek(0)
        return out.read()
    else:
        return snappy.compress(buff)
示例#12
0
def update_post(username, slug):
    user = current_user
    content = request.form.get('content', type=str)
    cursor = request.form.get('cursor', type=int)

    if content is not None:
        post = user.posts.filter_by(slug=slug).first()
        if post:
            post.cursor = len(content) if not cursor else cursor
            post.modified_timestamp = datetime.utcnow()
            
            # Get meta
            r = regex.compile(r'<<((?:(?>[^<>]+)|<(?!<)|>(?!>))*?)>>', regex.I | regex.S)
            post.meta = json.dumps(regex.findall(r, content))
            
            # Encrypt
            half_key = session[generate_hash(user.user_key_salt)]
            key = xor_keys(half_key, app.config['MASTER_KEY'])
            content = snappy.compress(content)
            content = AES_encrypt(key, user.username, content)
            post.content = content
            
            db.session.add(post)
            db.session.commit()
            return jsonify(error=None)
        return jsonify(error="Not found")
    elif cursor is not None:
        post = user.posts.filter_by(slug=slug).first()
        if post:
            post.cursor = cursor
            db.session.add(post)
            db.session.commit()
            return jsonify(error=None)
        return jsonify(error="Not found")
    return jsonify(error="Invalid parameters")
示例#13
0
def write_key(ds, kind, id, data_path):
    key = ds.key(kind, id)
    entity = datastore.Entity(
        key=key,
        exclude_from_indexes=['Value'])

    with open(data_path) as f:
        data = json.load(f)

    payload = {
        'LastModified': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'SchemaVersion': '',
        'DataType': data['dataType'],
        'Season': data['season']['name'],
        'Sport': data['sport'] if 'sport' in data else '',
        'League': data['league']['alias'],
        'TeamId': str(data['team']['id']),
        'PlayerId': data['player']['id'] if 'player' in data else '',
        'EventId': data['eventId'] if 'eventId' in data else '',
        'EventDate': data['eventDate'] if 'eventDate' in data else '',
        'EventType': data['eventType'] if 'eventType' in data else '',
        'Value': snappy.compress(msgpack.packb(data))
    }
    print payload

    entity.update(payload)
    ds.put(entity)
示例#14
0
    def read(self, _):
        # Stop copying when the bevy is full.
        if self.chunk_count_in_bevy >= self.owner.chunks_per_segment:
            return ""

        chunk = self.stream.read(self.owner.chunk_size)
        if not chunk:
            return ""

        self.size += len(chunk)

        if self.owner.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            compressed_chunk = zlib.compress(chunk)
        elif (snappy and self.owner.compression
              == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY):
            compressed_chunk = snappy.compress(chunk)
        elif self.owner.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED:
            compressed_chunk = chunk

        compressedLen = len(compressed_chunk)
        self.chunk_count_in_bevy += 1

        if compressedLen < self.owner.chunk_size - 16:
            self.bevy_index.append((self.bevy_length, compressedLen))
            self.bevy_length += compressedLen
            return compressed_chunk
        else:
            self.bevy_index.append((self.bevy_length, self.owner.chunk_size))
            self.bevy_length += self.owner.chunk_size
            return chunk
示例#15
0
    def FlushChunk(self, chunk):
        if len(chunk) == 0:
            return

        bevy_offset = self.bevy_length

        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            compressed_chunk = zlib.compress(chunk)
        elif (snappy
              and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY):
            compressed_chunk = snappy.compress(chunk)
        elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED:
            compressed_chunk = chunk

        compressedLen = len(compressed_chunk)

        if compressedLen < self.chunk_size - 16:
            self.bevy_index.append((bevy_offset, compressedLen))
            self.bevy.append(compressed_chunk)
            self.bevy_length += compressedLen
        else:
            self.bevy_index.append((bevy_offset, self.chunk_size))
            self.bevy.append(chunk)
            self.bevy_length += self.chunk_size

        #self.bevy_index.append((bevy_offset, len(compressed_chunk)))
        #self.bevy.append(compressed_chunk)
        #self.bevy_length += len(compressed_chunk)
        self.chunk_count_in_bevy += 1

        #self.buffer = chunk[self.chunk_size:]
        if self.chunk_count_in_bevy >= self.chunks_per_segment:
            self._FlushBevy()
示例#16
0
文件: main.py 项目: zgame/PythonCode
def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name}')  # Press Ctrl+F8 to toggle the breakpoint.

    print('-------------zlib---------------')
    message = 'hello'
    compressed = zlib.compress(message.encode())
    decompressed = zlib.decompress(compressed)

    print('original:', repr(message))
    print_hex(compressed)
    print('decompressed:', repr(decompressed))
    # with open('1.txt', 'w', encoding='utf-8') as f:  # python3
    #     f.write(compressed.decode(encoding="utf-8"))

    print('-------------gzip---------------')
    f_in = open("1.txt", "rb")  # 打开文件
    f_out = gzip.open("data.txt.gz", "wb")  # 创建压缩文件对象
    f_out.writelines(f_in)
    f_out.close()
    f_in.close()

    print('-------------snappy---------------')
    compressed = snappy.compress(message)
    print_hex(compressed)
    decompressed = snappy.uncompress(compressed)
    print('uncompressed:', repr(decompressed))
示例#17
0
文件: endpoint.py 项目: lithp/lahja
    def _compress_event(self, event: BaseEvent) -> Union[BaseEvent, bytes]:
        if self.has_snappy_support:
            import snappy

            return cast(bytes, snappy.compress(pickle.dumps(event)))
        else:
            return event
示例#18
0
    def write_header(self):

        if not self.fp:
            self.fp = open(self.filename, 'w+b')

        fp = self.fp

        # header layout:
        # 5I: major, minor, part0_size, n_properties, n_sections
        # 64s * n_sections
        # packed properties

        n_sections = len(self.section_names)
        part0_size = 20 + 64 * n_sections

        props_size = self._measure_properties_pack_size(self.properties)
        header_len = part0_size + props_size
        header_buf = bytearray(header_len)

        n_properties = len(self.properties)

        struct.pack_into('5I', header_buf, 0, self.major, self.minor,
                         part0_size, n_properties, n_sections)

        for n in range(0, n_sections):
            off = 20 + n * 64
            struct.pack_into('64s', header_buf, off,
                             self.section_names[n].encode('utf-8'))

        self._pack_properties(self.properties, header_buf, part0_size)

        fp.seek(0)

        # XXX: it's really pretty suprising that Python has no zero-copy way of
        # casting a bytearray to bytes or a readonly memoryview and you can't
        # fake it by subclassing the builtin memoryview class. We have to pass
        # in a read-only bytes like object here which means we have to incur a
        # redundant copy :(
        compressed = snappy.compress(bytes(header_buf))
        compressed_len = len(compressed)

        if compressed_len > self.frames_offset:
            # If this is a new file then there's no inherent limit to the
            # size of the header, but otherwise we can't grew beyond the
            # the space allocated when the file was first written:
            if self._is_empty:
                while compressed_len < self.frames_offset:
                    self.frames_offset *= 2
            else:
                raise Exception("header too long")

        fp.write(
            struct.pack('4sII', 'P4cK'.encode('ascii'), self.frames_offset,
                        compressed_len))

        fp.write(compressed)

        if fp.tell() < self.frames_offset:
            fp.seek(self.frames_offset - 1)
            fp.write(b' ')
示例#19
0
 def compress(self):
     if self._compressed != None:
         return self._compressed
     if self._decompressed == None:
         return None
     self._compressed = snappy.compress(self._decompressed)
     return self._compressed
示例#20
0
    def add_image(self, ts, img):
        """ Add depth image to archive """

        # write time stamp to file
        self._fp_ts.write('%f\n' % ts)
        self._fp_ts.flush()
        # add frame to binary file

        #if img.dtype == np.float32:
        # Is simulated data
        #mask = np.isnan(img)
        #if mask.any():
        # In simulation, the background has NaN depth values.
        # We replace them with 0 m, similar to what the Kinect V1 did.
        # See https://msdn.microsoft.com/en-us/library/jj131028.aspx.
        #rospy.logdebug("There was at least one NaN in the depth image. " +
        #              "I replaced all occurrences with 0.0 m.")
        #img.flags.writeable = True
        #img[mask] = 0.0
        # We now map the float values in meters to uint16 values in mm
        # as provided by the libfreenect2 library and Kinect SDK.
        #img *= 1000.0
        #img = img.astype(np.uint16, copy=False)
        #assert img.dtype == np.uint16

        # compress image with snappy
        img_comp = snappy.compress(img)
        # write number of bytes of compressed image
        nr_bytes = struct.pack('<L', len(img_comp))
        self._fp_d.write(nr_bytes)
        # write compressed image
        self._fp_d.write(img_comp)
        self._fp_d.flush()
示例#21
0
文件: client.py 项目: benhe119/gate
    def process_raw(self,
                    evidence_uuid,
                    pipeline,
                    data,
                    raw,
                    autosave=True,
                    wait_for_result=False):
        """Process the raw data using the given pipeline.
		@param evidence_uuid: Evidence id
		@param pipeline: Name of pipeline to use
		@param data: Current information about the stream
		@param raw: Raw data to process
		@param autosave: Auto passes result onto the engine
		@param wait_for_result: Wait to get the resulting data
		@returns: result data
		"""
        raw = base64.b64encode(snappy.compress(raw))
        if wait_for_result:
            return self._client.call({},
                                     'process_raw',
                                     evidence_uuid=evidence_uuid,
                                     pipeline=pipeline,
                                     data=data,
                                     raw=raw,
                                     return_result=wait_for_result,
                                     autosave=autosave)
        self._client.cast({},
                          'process_raw',
                          evidence_uuid=evidence_uuid,
                          pipeline=pipeline,
                          data=data,
                          raw=raw,
                          return_result=wait_for_result,
                          autosave=autosave)
示例#22
0
    def snappy_write_block(encoder, block_bytes):
        """Write block in "snappy" codec."""
        data = snappy.compress(block_bytes)

        encoder.write_long(len(data) + 4)  # for CRC
        encoder._fo.write(data)
        encoder.write_crc32(block_bytes)
示例#23
0
文件: snappy.py 项目: wglass/kiel
def compress(data):
    """
    Compresses given data via the snappy algorithm.

    The result is preceded with a header containing the string 'SNAPPY' and the
    default and min-compat versions (both ``1``).

    The block size for the compression is hard-coded at 32kb.

    If ``python-snappy`` is not installed a ``RuntimeError`` is raised.
    """
    if not snappy_available:
        raise RuntimeError("Snappy compression unavailable.")

    buff = BytesIO()
    buff.write(raw_header)

    for block_num in range(0, len(data), BLOCK_SIZE):
        block = data[block_num:block_num + BLOCK_SIZE]
        compressed = snappy.compress(block)

        buff.write(struct.pack("!i", len(compressed)))
        buff.write(compressed)

    result = buff.getvalue()

    buff.close()

    return result
示例#24
0
def _pack_msgpack_snappy(obj):
    # print "pack", obj
    tmp = msgpack.dumps(obj, encoding='utf-8')
    if len(tmp) > 1000:
        return b'S' + snappy.compress(tmp)
    else:
        return b'\0' + tmp
示例#25
0
def compress(data: bytes):
    """
	Compresses data with default encoding UTF-8. We currently use Google´s Snappy (it´s fast).
	:param data: bytes that are going to be compressed.
	:return: bytes compressed by algorithm.
	"""
    return snappy.compress(data, 'utf-8')
示例#26
0
 def encode(self, serializer: AbstractSerializer, compress: bool = False) \
           -> RawHeaderBody:
     metadata = b''
     if self.metadata is not None:
         metadata = self.metadata.encode()
     header = {
         'type': int(self.msgtype),
         'meth': self.method,
         'okey': self.order_key,
         'seq': self.client_seq_id,
         'zip': compress,
     }
     serialized_header: bytes = mpackb(header)
     body: Optional[bytes]
     if self.msgtype in (RPCMessageTypes.FUNCTION, RPCMessageTypes.RESULT):
         body = serializer(self.body)
     else:
         body = self.body
     data = {
         'meta': metadata,
         'body': body,
     }
     serialized_data: bytes = mpackb(data)
     if compress:
         if not has_snappy:
             raise ConfigurationError('python-snappy is not installed')
         serialized_data = snappy.compress(serialized_data)
     return RawHeaderBody(
         serialized_header,
         serialized_data,
         self.peer_id,
     )
示例#27
0
def _pack(obj) :
#     print "PACK", obj
    tmp = msgpack.dumps(obj)
    if len(tmp) > 1000:
        return 'S'  + snappy.compress(tmp)
    else:
        return '\0' + tmp
示例#28
0
def compress(data):
    """
    Compresses given data via the snappy algorithm.

    The result is preceded with a header containing the string 'SNAPPY' and the
    default and min-compat versions (both ``1``).

    The block size for the compression is hard-coded at 32kb.

    If ``python-snappy`` is not installed a ``RuntimeError`` is raised.
    """
    if not snappy_available:
        raise RuntimeError("Snappy compression unavailable.")

    buff = BytesIO()
    buff.write(raw_header)

    for block_num in range(0, len(data), BLOCK_SIZE):
        block = data[block_num:block_num + BLOCK_SIZE]
        compressed = snappy.compress(block)

        buff.write(struct.pack("!i", len(compressed)))
        buff.write(compressed)

    result = buff.getvalue()

    buff.close()

    return result
示例#29
0
        def new_fn(self, arg, *args, **kw):
            try:
                date = arg.name
            except AttributeError:
                date = arg

            if validate_date:
                assert DATE_RE.match(date), date

            fnam = hashlib.md5((source_id + date).encode('utf-8')).hexdigest()
            cache_path = get_cache_dir() / source_id / f'{fnam}.json'

            if not cache_path.parent.exists():
                cache_path.parent.mkdir()

            if cache_path.exists():
                with open(cache_path, 'rb') as f:
                    datapoints = json.loads(snappy.decompress(f.read()))
                    return [_DataPoint(*i) for i in datapoints]
            else:
                datapoints = fn(
                    self, date, *args,
                    **kw)  # WARNING: args/kw aren't taken into account here!!
                json_data = json.dumps([tuple(i)
                                        for i in datapoints]).encode('utf-8')
                json_data = snappy.compress(json_data)

                with open(cache_path, 'wb') as f:
                    f.write(json_data)
                return datapoints
示例#30
0
    def snappy_write_block(fo, block_bytes):
        """Write block in "snappy" codec."""
        data = snappy.compress(block_bytes)

        write_long(fo, len(data) + 4)  # for CRC
        fo.write(data)
        write_crc32(fo, block_bytes)
示例#31
0
def write_key(ds, kind, id, data_path):
    key = ds.key(kind, id)
    entity = datastore.Entity(key=key, exclude_from_indexes=['Value'])

    with open(data_path) as f:
        data = json.load(f)

    payload = {
        'LastModified': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'SchemaVersion': '',
        'DataType': data['dataType'],
        'Season': data['season']['name'],
        'Sport': data['sport'] if 'sport' in data else '',
        'League': data['league']['alias'],
        'TeamId': str(data['team']['id']),
        'PlayerId': data['player']['id'] if 'player' in data else '',
        'EventId': data['eventId'] if 'eventId' in data else '',
        'EventDate': data['eventDate'] if 'eventDate' in data else '',
        'EventType': data['eventType'] if 'eventType' in data else '',
        'Value': snappy.compress(msgpack.packb(data))
    }
    print payload

    entity.update(payload)
    ds.put(entity)
示例#32
0
    def snappy_write_block(fo, block_bytes):
        '''Write block in "snappy" codec.'''
        data = snappy.compress(block_bytes)

        write_long(fo, len(data) + 4)  # for CRC
        fo.write(data)
        write_crc32(fo, block_bytes)
示例#33
0
文件: jrpc_py.py 项目: raycool/vnpy
def _pack_msgpack_snappy(obj):
    # print "pack", obj
    tmp = msgpack.dumps(obj, encoding='utf-8')
    if len(tmp) > 1000:
        return b'S' + snappy.compress(tmp)
    else:
        return b'\0' + tmp
示例#34
0
    def run(self):
        while True:
            self.__RunningFlag = False

            try:
                PartitionID_ = self.__PendingTaskQueue.get(timeout=0.05)
            except:
                if self.__stop.is_set():
                    break
                else:
                    continue
            self.__RunningFlag = True
            UpdatedVertex = self.__ControlInfo['CalcFunc'](PartitionID_,
                                                           self.__DataInfo,
                                                           self.__GraphInfo,
                                                           self.__Dtype_All)
            start_id = PartitionID_ * self.__GraphInfo['VertexPerPartition']
            end_id = (PartitionID_ + 1) * \
                self.__GraphInfo['VertexPerPartition']
            UpdatedVertex = UpdatedVertex - \
                self.__DataInfo['VertexData'][start_id:end_id]
            UpdatedVertex[np.where(
                abs(UpdatedVertex) <= self.__ControlInfo['FilterThreshold']
            )] = 0
            UpdatedVertex = UpdatedVertex.astype(
                self.__Dtype_All['VertexData'])

            Tmp_UpdatedData = np.append(UpdatedVertex, PartitionID_)
            Tmp_UpdatedData = Tmp_UpdatedData.astype(
                self.__Dtype_All['VertexData'])
            Str_UpdatedData = Tmp_UpdatedData.tostring()
            Str_UpdatedData = snappy.compress(Str_UpdatedData)
            QueueUpdatedVertex.put(Str_UpdatedData)
示例#35
0
def _pack(obj) :
#     print "PACK", obj
    tmp = msgpack.dumps(obj)
    if len(tmp) > 1000:
        return 'S'  + snappy.compress(tmp)
    else:
        return '\0' + tmp
示例#36
0
    def test_compression(self):
        # test that we can add compressed chunks
        compressor = snappy.StreamCompressor()
        data = b"\0" * 50
        compressed_data = snappy.compress(data)
        crc = struct.pack("<L", snappy._masked_crc32c(data))
        self.assertEqual(crc, b"\x8f)H\xbd")
        self.assertEqual(len(compressed_data), 6)
        self.assertEqual(compressor.add_chunk(data, compress=True),
                         b"\xff\x06\x00\x00sNaPpY"
                         b"\x00\x0a\x00\x00" + crc + compressed_data)

        # test that we can add uncompressed chunks
        data = b"\x01" * 50
        crc = struct.pack("<L", snappy._masked_crc32c(data))
        self.assertEqual(crc, b"\xb2\x14)\x8a")
        self.assertEqual(compressor.add_chunk(data, compress=False),
                         b"\x01\x36\x00\x00" + crc + data)

        # test that we can add more data than will fit in one chunk
        data = b"\x01" * (snappy._CHUNK_MAX * 2 - 5)
        crc1 = struct.pack("<L",
                snappy._masked_crc32c(data[:snappy._CHUNK_MAX]))
        self.assertEqual(crc1, b"h#6\x8e")
        crc2 = struct.pack("<L",
                snappy._masked_crc32c(data[snappy._CHUNK_MAX:]))
        self.assertEqual(crc2, b"q\x8foE")
        self.assertEqual(compressor.add_chunk(data, compress=False),
                b"\x01\x04\x00\x01" + crc1 + data[:snappy._CHUNK_MAX] +
                b"\x01\xff\xff\x00" + crc2 + data[snappy._CHUNK_MAX:])
示例#37
0
def encode_inform(config, data):
    iv = Random.new().read(16)

    key = MASTER_KEY
    if config.getboolean('gateway', 'is_adopted'):
        key = config.get('gateway', 'key')

    payload = None
    flags = 3
    if 'snappy' in sys.modules:
        payload = snappy.compress(data)
        flags = 5
    else:
        payload = zlib.compress(data)
    pad_len = AES.block_size - (len(payload) % AES.block_size)
    payload += chr(pad_len) * pad_len
    payload = AES.new(a2b_hex(key), AES.MODE_CBC, iv).encrypt(payload)
    mac = config.get('gateway', 'lan_mac')

    encoded_data = 'TNBU'  # magic
    encoded_data += pack('>I', 1)  # packet version
    encoded_data += pack('BBBBBB', *(mac_string_2_array(mac)))
    encoded_data += pack('>H', flags)  # flags
    encoded_data += iv  # encryption iv
    encoded_data += pack('>I', 1)  # payload version
    encoded_data += pack('>I', len(payload))  # payload length
    encoded_data += payload

    return encoded_data
 def _encode(self, data):
     try:
         import snappy
     except ImportError:
         _print_import_error()
         raise
     return snappy.compress(data)
示例#39
0
    def save(self, data):
        compressed_data = snappy.compress(data)
        encrypted_data = self.encrypter.encrypt_data(compressed_data)
        questionnaire_state = QuestionnaireState(
            self._user_id, encrypted_data, QuestionnaireStore.LATEST_VERSION)

        current_app.eq["storage"].put(questionnaire_state)
示例#40
0
def compress(compression_scheme, compression_level, data, compressor_context):

    if compression_scheme == 0:  # zlib
        return zlib.compress(data, compression_level)

    elif compression_scheme == 1:  # zstd
        return compressor_context.compress(data)

    elif compression_scheme == 2:  # lz4
        return lz4.frame.compress(data,
                                  compression_level=compression_level,
                                  store_size=False)

    elif compression_scheme == 3:  # snappy
        return snappy.compress(data)

    elif compression_scheme == 4:  # bzip
        return bz2.compress(data, compresslevel=compression_level)

    elif compression_scheme == 5:  # lzma
        return lzma.compress(data, preset=compression_level)

    elif compression_scheme == 6:  # blosc_zlib
        return blosc.compress(data,
                              clevel=compression_level,
                              cname='zlib',
                              shuffle=blosc.BITSHUFFLE)

    elif compression_scheme == 7:  # blosc_zstd
        return blosc.compress(data,
                              clevel=compression_level,
                              cname='zstd',
                              shuffle=blosc.BITSHUFFLE)

    elif compression_scheme == 8:  # blosc_lz4
        return blosc.compress(data,
                              clevel=compression_level,
                              cname='lz4',
                              shuffle=blosc.BITSHUFFLE)

    elif compression_scheme == 9:  # blosc_snappy
        return blosc.compress(data,
                              clevel=compression_level,
                              cname='snappy',
                              shuffle=blosc.BITSHUFFLE)

    elif compression_scheme == 10:  # blosclz
        return blosc.compress(data,
                              clevel=compression_level,
                              cname='blosclz',
                              shuffle=blosc.BITSHUFFLE)

    elif compression_scheme == 11:  # blosc_lz4hc
        return blosc.compress(data,
                              clevel=compression_level,
                              cname='lz4hc',
                              shuffle=blosc.BITSHUFFLE)
    else:
        raise NotImplementedError('compression scheme not implemented')
	def rewrite(data_string):
		data=json.loads(data_string)
		toupdate=json.loads(update)
		#primary_key_modified=False

		#delete the appropriate document
		query=BooleanQuery()
		for key in primary_keys_map:
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key])
			query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		

		#modify the values
		for key,value in toupdate.items():
			#if such a key is not present the we either add and update that key into data,or just ignore it!(By default it is set to True!)	
			if add_field_if_not_exists==False:
				if key in data.keys():
					data[key]=value
			else:		
				data[key]=value

		#this deletion statement has been intenstionally added here		
		#only if the modified data,has primary keys already not existing,will the updating process continue
		primary_key_update=False
		for key in toupdate.keys():
			if key in primary_keys_map:
				primary_key_update=True
				break
		if primary_key_update == True:
			query_search=BooleanQuery()
			for key in primary_keys_map:
				temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key])
				query_search.add(BooleanClause(temp,BooleanClause.Occur.MUST))
			hits=searcher.search(query_search,MAX_RESULTS).scoreDocs
			if len(hits) > 0:
				return 106			
		writer.deleteDocuments(query)

		#add the newly modified document
		doc=Document()
		#index files wrt primary key
		for primary_key in primary_keys_map:
			try:
				field=Field(primary_key,data[primary_key],Field.Store.NO,Field.Index.ANALYZED)
				doc.add(field)
			except:
				# primary_keys_map.pop(collection_name)
				return 101
		#compress data using snappy if compression is on		
		if to_be_compressed_input==True:
			temp=json.dumps(data)
			data_string=base64.b64encode(snappy.compress(temp))
		else:
			temp=json.dumps(data)
			data_string=base64.b64encode(temp)

		field=Field("$DATA$",data_string,Field.Store.YES,Field.Index.ANALYZED)
		doc.add(field)
		writer.addDocument(doc)
示例#42
0
文件: codec.py 项目: ciena/afkak
def snappy_encode(payload, xerial_compatible=False,
                  xerial_blocksize=32 * 1024):
    """
    Compress the given data with the Snappy algorithm.

    :param bytes payload: Data to compress.
    :param bool xerial_compatible:
        If set then the stream is broken into length-prefixed blocks in
        a fashion compatible with the xerial snappy library.

        The format winds up being::

            +-------------+------------+--------------+------------+--------------+
            |   Header    | Block1_len | Block1 data  | BlockN len | BlockN data  |
            |-------------+------------+--------------+------------+--------------|
            |  16 bytes   |  BE int32  | snappy bytes |  BE int32  | snappy bytes |
            +-------------+------------+--------------+------------+--------------+

    :param int xerial_blocksize:
        Number of bytes per chunk to independently Snappy encode. 32k is the
        default in the xerial library.

    :returns: Compressed bytes.
    :rtype: :class:`bytes`
    """
    if not has_snappy():  # FIXME This should be static, not checked every call.
        raise NotImplementedError("Snappy codec is not available")

    if xerial_compatible:
        def _chunker():
            for i in range(0, len(payload), xerial_blocksize):
                yield payload[i:i+xerial_blocksize]

        out = BytesIO()
        out.write(_XERIAL_HEADER)

        for chunk in _chunker():
            block = snappy.compress(chunk)
            out.write(struct.pack('!i', len(block)))
            out.write(block)

        out.seek(0)
        return out.read()

    else:
        return snappy.compress(payload)
示例#43
0
    def compress(data):
        meta = {
            "compression": "snappy",
            "orig_size": len(data)
        }

        compressed_data = snappy.compress(data)
        return meta, compressed_data
示例#44
0
  def test_view_snappy_compressed(self):
    if not snappy_installed():
      raise SkipTest
    import snappy

    cluster = pseudo_hdfs4.shared_cluster()
    finish = []
    try:
      prefix = self.cluster.fs_prefix + '/test_view_snappy_compressed'
      self.self.cluster.fs.mkdir(prefix)

      f = cluster.fs.open(prefix + '/test-view.snappy', "w")
      f.write(snappy.compress('This is a test of the emergency broadcasting system.'))
      f.close()

      f = cluster.fs.open(prefix + '/test-view.stillsnappy', "w")
      f.write(snappy.compress('The broadcasters of your area in voluntary cooperation with the FCC and other authorities.'))
      f.close()

      f = cluster.fs.open(prefix + '/test-view.notsnappy', "w")
      f.write('foobar')
      f.close()

      # Snappy compressed fail
      response = c.get('/filebrowser/view=%s/test-view.notsnappy?compression=snappy' % prefix)
      assert_true('Failed to decompress' in response.context['message'], response)

      # Snappy compressed succeed
      response = c.get('/filebrowser/view=%s/test-view.snappy' % prefix)
      assert_equal('snappy', response.context['view']['compression'])
      assert_equal(response.context['view']['contents'], 'This is a test of the emergency broadcasting system.', response)

      # Snappy compressed succeed
      response = c.get('/filebrowser/view=%s/test-view.stillsnappy' % prefix)
      assert_equal('snappy', response.context['view']['compression'])
      assert_equal(response.context['view']['contents'], 'The broadcasters of your area in voluntary cooperation with the FCC and other authorities.', response)

      # Largest snappy compressed file
      finish.append( MAX_SNAPPY_DECOMPRESSION_SIZE.set_for_testing(1) )
      response = c.get('/filebrowser/view=%s/test-view.stillsnappy?compression=snappy' % prefix)
      assert_true('File size is greater than allowed max snappy decompression size of 1' in response.context['message'], response)

    finally:
      for done in finish:
        done()
示例#45
0
def pack(data):
    if SNAPPY_ENABLED:
        data = snappy.compress(
            pyarrow.serialize(data).to_buffer().to_pybytes())
        # TODO(ekl) we shouldn't need to base64 encode this data, but this
        # seems to not survive a transfer through the object store if we don't.
        return base64.b64encode(data)
    else:
        return data
示例#46
0
def snappy_write_block(stream, block_bytes):
    """Write a block of bytes wih the 'snappy' codec."""
    data = snappy.compress(block_bytes)
    # Add 4 bytes for the CRC32
    write_long(stream, len(data) + 4)
    stream.write(data)
    # Write the 4-byte, big-endian CRC32 checksum
    crc = crc32(block_bytes) & 0xFFFFFFFF
    stream.write(pack('>I', crc))
示例#47
0
 def transform(self, buf):
     for trans_id in self.__write_transforms:
         if trans_id == self.ZLIB_TRANSFORM:
             buf = zlib.compress(buf)
         elif trans_id == self.SNAPPY_TRANSFORM:
             buf = snappy.compress(buf)
         else:
             raise TTransportException(TTransportException.INVALID_TRANSFORM, "Unknown transform during send")
     return buf
def _get(key, callback, args):
    r = cache_table.find_one({'_id': key})
    if not r:
        content = callback(*args)
        data = bson.binary.Binary(snappy.compress(content))
        cache_table.insert_one({'_id': key, 'data': data})
    else:
        data = r['data']
    content = snappy.decompress(data)
    return content
示例#49
0
 def enqueue(self, obj):
     data = pickle.dumps(obj)
     if getattr(self, "_debug_pickle", False):
         import objgraph
         restored = pickle.loads(data)
         objgraph.show_refs(restored, too_many=40)
     data = snappy.compress(data)
     self.debug("Broadcasting %d bytes" % len(data))
     zmq_connection = getattr(self, "zmq_connection")
     if zmq_connection is not None:
         zmq_connection.send(data)
    def __compress(self, event):

        original_event=event
        event['data']=snappy.compress(event['data'])
        event['header']['snappy']=True
        self.logging.debug("Incoming data compressed.")
        try:
            self.queuepool.outbox.put(event)
        except QueueLocked:
            self.queuepool.inbox.rescue(original_event)
            self.queuepool.outbox.waitUntilPutAllowed()
示例#51
0
 def fset(self, inst, value):
     
     nprow = getattr(inst, 'NumpyArrayTable__'+self.name)
     #~ print 'fset',self.name,  nprow, value
     
     if nprow is None:
         nprow = self.NumpyArrayTableClass()
         setattr(inst, 'NumpyArrayTable__'+self.name, nprow)
     
     if value is None:
         if hasattr(inst, self.name+'_array') :
             delattr(inst, self.name+'_array')
         nprow.shape = None
         nprow.dtype = None
         nprow.blob = None
         nprow.units = None
         nprow.compress = None
         return 
     
     if self.arraytype == np.ndarray:
         assert (type(value) == np.ndarray) or (type(value) == np.memmap) , 'Value is not np.array or np.memmap but {}'.format(type(value))
     if self.arraytype == pq.Quantity:
         assert type(value) == pq.Quantity , '{} {} {} value is not pq.Quantity'.format(inst.__class__.__name__, self.name, value)
     
     shape = ('{},'*value.ndim)[:-1].format(*value.shape)
     if shape.endswith(',') : shape = shape[:-1]
     nprow.shape = shape
     
     nprow.dtype = value.dtype.str
     
     if self.compress == 'blosc':
         blob = blosc.compress(value.tostring(), typesize = value.dtype.itemsize, clevel= 9)
     else:
         if not value.flags['C_CONTIGUOUS']:
             #~ buf = np.getbuffer(np.array(value, copy = True))
             buf = np.array(value, copy=True).data
         else:     
             #~ buf = np.getbuffer(value)
             buf = value.data
         if self.compress == 'zlib':
             blob = zlib.compress(buf)
         elif self.compress == 'lz4':
             blob = lz4.compress(buf)
         elif self.compress == 'snappy':
             blob = snappy.compress(buf)
         else :
             blob = buf
     nprow.compress = self.compress
     nprow.blob = blob
     
     if self.arraytype == pq.Quantity:
         nprow.units = value.dimensionality.string
     
     setattr(inst, self.name+'_array', value)
示例#52
0
    def test_decompression(self):
        # test that we check for the initial stream identifier
        data = b"\x01" * 50
        self.assertRaises(snappy.UncompressError,
                snappy.StreamDecompressor().decompress,
                    b"\x01\x36\x00\00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data)
        self.assertEqual(
                snappy.StreamDecompressor().decompress(
                    b"\xff\x06\x00\x00sNaPpY"
                    b"\x01\x36\x00\x00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data),
                data)
        decompressor = snappy.StreamDecompressor()
        decompressor.decompress(b"\xff\x06\x00\x00sNaPpY")
        self.assertEqual(
                decompressor.copy().decompress(
                    b"\x01\x36\x00\x00" +
                    struct.pack("<L", snappy._masked_crc32c(data)) + data),
                data)

        # test that we throw errors for unknown unskippable chunks
        self.assertRaises(snappy.UncompressError,
                decompressor.copy().decompress, b"\x03\x01\x00\x00")

        # test that we skip unknown skippable chunks
        self.assertEqual(b"",
                         decompressor.copy().decompress(b"\xfe\x01\x00\x00"))

        # test that we check CRCs
        compressed_data = snappy.compress(data)
        real_crc = struct.pack("<L", snappy._masked_crc32c(data))
        fake_crc = os.urandom(4)
        self.assertRaises(snappy.UncompressError,
                decompressor.copy().decompress,
                    b"\x00\x0a\x00\x00" + fake_crc + compressed_data)
        self.assertEqual(
                decompressor.copy().decompress(
                    b"\x00\x0a\x00\x00" + real_crc + compressed_data),
                data)

        # test that we buffer when we don't have enough
        uncompressed_data = os.urandom(100)
        compressor = snappy.StreamCompressor()
        compressed_data = (compressor.compress(uncompressed_data[:50]) +
                           compressor.compress(uncompressed_data[50:]))
        for split1 in range(len(compressed_data) - 1):
            for split2 in range(split1, len(compressed_data)):
                decompressor = snappy.StreamDecompressor()
                self.assertEqual(
                    (decompressor.decompress(compressed_data[:split1]) +
                     decompressor.decompress(compressed_data[split1:split2]) +
                     decompressor.decompress(compressed_data[split2:])),
                    uncompressed_data)
示例#53
0
def Compress(Input):
	Output = Input + '.snappy'
	file_in = file(Input, "rb")
	data = file_in.read()

	file_out = file(Output, "wb")
	c_data = snappy.compress(data)
	file_out.write(c_data)
	file_out.close()

	file_in.close()
示例#54
0
 def transform(self, buf):
     for trans_id in self.__write_transforms:
         if trans_id == TRANSFORM.ZLIB:
             buf = zlib.compress(buf)
         elif trans_id == TRANSFORM.SNAPPY:
             buf = snappy.compress(buf)
         elif trans_id == TRANSFORM.ZSTD:
             buf = zstd.ZstdCompressor(write_content_size=True).compress(buf)
         else:
             raise TTransportException(TTransportException.INVALID_TRANSFORM,
                                       "Unknown transform during send")
     return buf
def _get(url, callback, *args):
    key = get_sha1_key(url)
    r = cache_table.find_one({'_id': key})
    if not r:
        throttle.run()
        r = requests.get(url)
        content = callback(r, *args)
        data = bson.binary.Binary(snappy.compress(content))
        cache_table.insert_one({'_id': key, 'data': data})
    else:
        data = r['data']
    content = snappy.decompress(data)
    return content
示例#56
0
  def _WriteBlock(self):
    if not self._header_written:
      self._WriteHeader()

    if self.block_count <= 0:
      logger.info('Current block is empty, nothing to write.')
      return

    # write number of items in block
    self.encoder.write_long(self.block_count)

    # write block contents
    uncompressed_data = self._buffer_writer.getvalue()
    codec = self.GetMeta(CODEC_KEY).decode('utf-8')
    if codec == 'null':
      compressed_data = uncompressed_data
      compressed_data_length = len(compressed_data)
    elif codec == 'deflate':
      # The first two characters and last character are zlib
      # wrappers around deflate data.
      compressed_data = zlib.compress(uncompressed_data)[2:-1]
      compressed_data_length = len(compressed_data)
    elif codec == 'snappy':
      compressed_data = snappy.compress(uncompressed_data)
      compressed_data_length = len(compressed_data) + 4 # crc32
    else:
      fail_msg = '"%s" codec is not supported.' % codec
      raise DataFileException(fail_msg)

    # Write length of block
    self.encoder.write_long(compressed_data_length)

    # Write block
    self.writer.write(compressed_data)

    # Write CRC32 checksum for Snappy
    if self.GetMeta(CODEC_KEY) == 'snappy':
      self.encoder.write_crc32(uncompressed_data)

    # write sync marker
    self.writer.write(self.sync_marker)

    logger.debug(
        'Writing block with count=%d nbytes=%d sync=%r',
        self.block_count, compressed_data_length, self.sync_marker)

    # reset buffer
    self._buffer_writer.seek(0)
    self._buffer_writer.truncate()
    self._block_count = 0
示例#57
0
文件: client.py 项目: vindeka/gate
	def process_raw(self, evidence_uuid, pipeline, data, raw, autosave=True, wait_for_result=False):
		"""Process the raw data using the given pipeline.
		@param evidence_uuid: Evidence id
		@param pipeline: Name of pipeline to use
		@param data: Current information about the stream
		@param raw: Raw data to process
		@param autosave: Auto passes result onto the engine
		@param wait_for_result: Wait to get the resulting data
		@returns: result data
		"""
		raw = base64.b64encode(snappy.compress(raw))
		if wait_for_result:
			return self._client.call({}, 'process_raw', evidence_uuid=evidence_uuid, pipeline=pipeline,
				data=data, raw=raw, return_result=wait_for_result, autosave=autosave)
		self._client.cast({}, 'process_raw', evidence_uuid=evidence_uuid, pipeline=pipeline,
			data=data, raw=raw, return_result=wait_for_result, autosave=autosave)
示例#58
0
    def FlushChunk(self, chunk):
        bevy_offset = self.bevy_length

        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            compressed_chunk = zlib.compress(chunk)
        elif snappy and self.compression == lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY:
            compressed_chunk = snappy.compress(chunk)
        elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED:
            compressed_chunk = chunk

        self.bevy_index.append(bevy_offset)
        self.bevy.append(compressed_chunk)
        self.bevy_length += len(compressed_chunk)
        self.chunk_count_in_bevy += 1

        if self.chunk_count_in_bevy >= self.chunks_per_segment:
            self._FlushBevy()
示例#59
0
文件: record.py 项目: amiller/quartet
def worker(q):
    while 1:
        fn_data = q.get()
        st = time.time()
        if fn_data is None:
            break
        orig = comp = 0
        fn, data = fn_data
        if fn.endswith('.jpg') or fn.endswith('.ppm'):
            cv2.imwrite(fn, data)
        elif fn.endswith('.npy'):
            np.save(fn, data)
        elif fn.endswith('.snappy'):
            data_snappy = snappy.compress(data)
            open(fn, 'w').write(data_snappy)
            orig = len(data)
            comp = len(data_snappy)
        print('Size[%d] Fn[%s] Time[%f] Orig[%d] Comp[%d]' % (q.qsize(), os.path.basename(fn), time.time() - st, orig, comp))