示例#1
0
    def take_action(self, parsed_args):
        self.log = logging.getLogger(self.__class__.__name__)
        self.log.debug('Initialized %s', self.__class__.__name__)
        self.log.debug('Args: %s', str(parsed_args))

        self.log.debug("Establishing connection")

        c, db, collection = OutputDBInterface.get_db_connection(hostname=parsed_args.hostname)

        cursor = collection.find()
        N = cursor.count()
        if N == 0:
            self.log.error("No events in the output database; no file made.")
            return

        f = gzip.open(parsed_args.filename, 'wb')

        pickle.dump(__version__, f)

        self.log.info("Processing %d trigger events" % N)

        for i in tqdm(range(N)):
            doc = next(cursor)
            doc2 = snappy.uncompress(doc['compressed_doc'])
            doc2 = pickle.loads(doc2)

            pickle.dump(doc2, f)


        f.close()
示例#2
0
def decompress(data):
    """
    Decompresses the given data via the snappy algorithm.

    If ``python-snappy`` is not installed a ``RuntimeError`` is raised.
    """
    if not snappy_available:
        raise RuntimeError("Snappy compression unavailable.")

    buff_offset = len(raw_header)  # skip the header
    length = len(data) - len(raw_header)

    output = BytesIO()

    while buff_offset <= length:
        block_size = struct.unpack_from("!i", data, buff_offset)[0]
        buff_offset += struct.calcsize("!i")

        block = struct.unpack_from("!%ds" % block_size, data, buff_offset)[0]
        buff_offset += block_size

        output.write(snappy.uncompress(block))

    result = output.getvalue()

    output.close()

    return result
示例#3
0
    def __init__(self,  filename):
        self.fileName = filename
        self.api = "API_UNKNOWN"
        self.traceFile = open(self.fileName, 'rb+')
        self.filePointer = 0
        self.fileSize = os.path.getsize(self.fileName)
        self.nextCallNumber = 0
        self.lastFrameBreakPos = 0

        self.container = 0
        self.containerPointer = 0

        self.fullFilePosition = 0

        self.mem = self.traceFile.read(2)
        self.filePointer += 2

        if self.mem[0:2] != b'at':
            raise Exception("not snappy file!")

        length = int(struct.unpack('I', self.traceFile.read(4))[0])
        self.filePointer += 4

        compressedMem = self.traceFile.read(length)
        self.filePointer += length

        self.mem = snappy.uncompress(compressedMem)
        self.getVersion(self.mem)
        return
示例#4
0
    def content(self):
        """
        Holt den Content aus dem *_content*-Attribut oder aus dem
        *_blobs*-Ordner und gibt diesen entpackt zurück.
        """

        # Falls der Content noch nicht gespeichert wurde, befindet
        # er sich noch in *self._content*.
        if not self._content is None:
            return self._content

        # Nachsehen ob es einen Blob für den Content gibt
        if not self.content_blob_name:
            return None

        # Blob laden, entpacken und zurück geben
        blob_dir = os.path.join(config.DATABLOBSDIR.value, self.content_blob_name[0])
        blob_path = os.path.join(blob_dir, self.content_blob_name)
        with io.open(blob_path, "rb") as blob_file:
            if self.content_blob_name.endswith(".snappy"):
                content = snappy.uncompress(blob_file.read())
            else:
                content = blob_file.read()
            if content and self.node.content_type in constants.CONTENT_TYPES_TEXT:
                return content.decode("utf-8")
            else:
                return content
示例#5
0
def print_journal(fn, at=0):

    # file structure
    header_struct = xstruct('< 5s 20s 1s 128s 2s Q')
    section_struct = xstruct('< I Q Q')
    footer_struct = xstruct('< I QQ Q 4s')
    align = 8192

    # open file
    f = open(fn, 'rb')
    sz = os.fstat(f.fileno()).st_size # 2.4 won't accept 0
    buf = mmap.mmap(f.fileno(), sz, prot=mmap.PROT_READ)
    
    # file header
    magic, date, _, path, _, fileid = unpack_from(header_struct, buf)
    path = path[:path.find('\0')]
    date = date[:date.find('\0')]
    print '%08x: header magic=%s date=%s path=%s fid=%x' % (0, hex(magic), date, path, fileid)
    if at==0:
        at = 8192
    
    # traverse file
    while at < len(buf):
    
        # section header
        l, lsn, fid = unpack_from(section_struct, buf, at)
        lp = (l + align-1) & ~(align-1)
        section_at = at + 20
        footer_at = at + l - 32
        ok = 'OK'
        if fid!=fileid: ok = 'BAD'
        print '%08x: section l=%x(%d) lp=%x(%d) lsn=%x(%d) fid=%x(%s)' % \
            (at, l, l, lp, lp, lsn, lsn, fid, ok)
    
        # compute hash, compare with footer
        sentinel, hash_a, hash_b, reserved, magic = unpack_from(footer_struct, buf, footer_at)
        computed_hash_a, computed_hash_b = hash(buf[at:footer_at])
        hash_ok = 'OK'
        if not (hash_a==computed_hash_a and hash_b==computed_hash_b): ok = 'BAD'
        print '%08x: hash=%08x:%08x(%s)' % (at, computed_hash_a, computed_hash_b, hash_ok)
    
        # section
        try:
            if snappy:
                section = snappy.uncompress(buf[section_at:footer_at])
                print '%08x: uncompressed length=%x(%d)' % (section_at, len(section), len(section))
                if do_journal_entries:
                    print_journal_entries(section)
        except Exception, e:
            print '%08x: %s' % (section_at, e)
    
        # section footer
        print '%08x: footer sentinel=%x hash=%08x:%08x(%s) magic=%s' % \
            (footer_at, sentinel, hash_a, hash_b, hash_ok, hex(magic))
    
        # next section
        at += lp
示例#6
0
文件: jrpc_py.py 项目: zorro430/JAQS
def _unpack_msgpack_snappy(str):
    if str.startswith(b'S'):
        tmp = snappy.uncompress(str[1:])
        # print "SNAPPY: ", len(str), len(tmp)
        obj = msgpack.loads(tmp, encoding='utf-8')
    elif str.startswith(b'\0'):
        obj = msgpack.loads(str[1:], encoding='utf-8')
    else:
        return None
    
    return obj
示例#7
0
文件: jrpc_py.py 项目: raycool/vnpy
def _unpack_msgpack_snappy(str):
    if str.startswith(b'S'):
        tmp = snappy.uncompress(str[1:])
        # print "SNAPPY: ", len(str), len(tmp)
        obj = msgpack.loads(tmp, encoding='utf-8')
    elif str.startswith(b'\0'):
        obj = msgpack.loads(str[1:], encoding='utf-8')
    else:
        return None
    
    return obj
示例#8
0
def _unpack(str) :

    if str[0] == 'S':
        tmp = snappy.uncompress(str[1:])
        obj = msgpack.loads(tmp)
    elif str[0] == '\0':
        obj = msgpack.loads(str[1:])
    else:
        return None

    #print "UNPACK", obj
    return obj
示例#9
0
def _unpack(str) :

    if str[0] == 'S':
        tmp = snappy.uncompress(str[1:])
        obj = msgpack.loads(tmp)
    elif str[0] == '\0':
        obj = msgpack.loads(str[1:])
    else:
        return None

    #print "UNPACK", obj
    return obj
示例#10
0
def _unpack(str) :

    if str[0] == 'S':
        tmp = snappy.uncompress(str[1:])
        #print "SNAPPY: ", len(str), len(tmp)
        obj = msgpack.loads(tmp)
    elif str[0] == '\0':
        obj = msgpack.loads(str[1:])
    else:
        return None

    return obj
示例#11
0
 def getByte(self):
     if self.containerPointer == len(self.mem):
         length = int(struct.unpack('I', self.traceFile.read(4))[0])
         self.filePointer += 4
         compressedMem = self.traceFile.read(length)
         self.filePointer += length
         self.container += 1
         self.mem = snappy.uncompress(compressedMem)
         self.containerPointer = 0
     rval= self.mem[self.containerPointer]
     self.containerPointer += 1
     self.fullFilePosition += 1
     return rval
示例#12
0
文件: demo.py 项目: grschafer/skadi
def read(stream, peek):
  if peek.cls not in PBMSG_BY_KIND.values():
    msg = 'please update demo.proto: {0}'.format(peek.cls)
    raise InvalidProtobufMessage(msg)

  stream.seek(peek.offset)

  data = stream.read(peek.size)
  if peek.compressed:
    data = snappy.uncompress(data)

  message = peek.cls()
  message.ParseFromString(data)

  return message
示例#13
0
 def _decompress_subblock(self):
     if self._subblock_size is None:
         if len(self._buf) <= 4:
             return b""
         self._subblock_size = struct.unpack(">i", self._buf[:4])[0]
         self._buf = self._buf[4:]
     # Only attempt to decompress complete subblocks.
     if len(self._buf) < self._subblock_size:
         return b""
     compressed = self._buf[:self._subblock_size]
     self._buf = self._buf[self._subblock_size:]
     uncompressed = snappy.uncompress(compressed)
     self._block_read += len(uncompressed)
     self._subblock_size = None
     return uncompressed
示例#14
0
文件: demo.py 项目: twocngdagz/skadi
def read(stream, peek):
    if peek.cls not in PBMSG_BY_KIND.values():
        msg = 'please update demo.proto: {0}'.format(peek.cls)
        raise InvalidProtobufMessage(msg)

    stream.seek(peek.offset)

    data = stream.read(peek.size)
    if peek.compressed:
        data = snappy.uncompress(data)

    message = peek.cls()
    message.ParseFromString(data)

    return message
示例#15
0
def decompress(data, compressor_id):
    if compressor_id == SnappyContext.compressor_id:
        # python-snappy doesn't support the buffer interface.
        # https://github.com/andrix/python-snappy/issues/65
        # This only matters when data is a memoryview since
        # id(bytes(data)) == id(data) when data is a bytes.
        # NOTE: bytes(memoryview) returns the memoryview repr
        # in Python 2.7. The right thing to do in 2.7 is call
        # memoryview.tobytes(), but we currently only use
        # memoryview in Python 3.x.
        return snappy.uncompress(bytes(data))
    elif compressor_id == ZlibContext.compressor_id:
        return zlib.decompress(data)
    else:
        raise ValueError("Unknown compressorId %d" % (compressor_id, ))
示例#16
0
def decompress(data, compressor_id):
    if compressor_id == SnappyContext.compressor_id:
        # python-snappy doesn't support the buffer interface.
        # https://github.com/andrix/python-snappy/issues/65
        # This only matters when data is a memoryview since
        # id(bytes(data)) == id(data) when data is a bytes.
        return snappy.uncompress(bytes(data))
    elif compressor_id == ZlibContext.compressor_id:
        return zlib.decompress(data)
    elif compressor_id == ZstdContext.compressor_id:
        # ZstdDecompressor is not thread safe.
        # TODO: Use a pool?
        return ZstdDecompressor().decompress(data)
    else:
        raise ValueError("Unknown compressorId %d" % (compressor_id, ))
示例#17
0
 def _decompress_subblock(self):
     if self._subblock_size is None:
         if len(self._buf) <= 4:
             return b""
         self._subblock_size = struct.unpack(">i", self._buf[:4])[0]
         self._buf = self._buf[4:]
     # Only attempt to decompress complete subblocks.
     if len(self._buf) < self._subblock_size:
         return b""
     compressed = self._buf[:self._subblock_size]
     self._buf = self._buf[self._subblock_size:]
     uncompressed = snappy.uncompress(compressed)
     self._block_read += len(uncompressed)
     self._subblock_size = None
     return uncompressed
def decompress(data, compressor_id):
    if compressor_id == SnappyContext.compressor_id:
        # python-snappy doesn't support the buffer interface.
        # https://github.com/andrix/python-snappy/issues/65
        # This only matters when data is a memoryview since
        # id(bytes(data)) == id(data) when data is a bytes.
        # NOTE: bytes(memoryview) returns the memoryview repr
        # in Python 2.7. The right thing to do in 2.7 is call
        # memoryview.tobytes(), but we currently only use
        # memoryview in Python 3.x.
        return snappy.uncompress(bytes(data))
    elif compressor_id == ZlibContext.compressor_id:
        return zlib.decompress(data)
    else:
        raise ValueError("Unknown compressorId %d" % (compressor_id,))
示例#19
0
    def find_by_bond_topology_id(self, btid):
        """Finds all the conformer associated with a bond topology id.

    Args:
      btid: bond topology id to look up.

    Returns:
      iterable of dataset_pb2.Conformer
    """
        cur = self._conn.cursor()
        select = (f'SELECT cid, conformer '
                  f'FROM {_CONFORMER_TABLE_NAME} '
                  f'INNER JOIN {_BTID_TABLE_NAME} USING(cid) '
                  f'WHERE {_BTID_TABLE_NAME}.btid = ?')
        cur.execute(select, (btid, ))
        return (dataset_pb2.Conformer().FromString(snappy.uncompress(
            result[1])) for result in cur)
示例#20
0
    def post(self):
        """Insert a message
        """
        # print self.request.body
        print "get message"
        userid = self.get_argument("userid")
        pcid = self.get_argument("pcid")
        packettype = self.get_argument("type")
        pos = self.request.body.find("&data=")

        if pos==-1:
            self.write('error')
            self.finish()
            return

        snappydata = self.request.body[pos+6:]

        try:
            protodata=snappy.uncompress(snappydata)
            for package in protodata.split("!!!"):

                if len(package) < 10:
                    continue
                if packettype == "1":
                    packetobj = IpPacket()
                    packetobj.ParseFromString(package)
                    self.saveipdata(packetobj,userid,pcid)
                elif packettype == "2":
                    packetobj = EmailPacket()
                    packetobj.ParseFromString(package)
                    self.saveemaildata(packetobj,userid,pcid)
                elif packettype == "3":
                    packetobj = HttpPacket()
                    packetobj.ParseFromString(package)
                    self.savehttpdata(packetobj,userid,pcid)
        except:
            print "error"
            print "print self.request.body",self.request.body
            print "self.request.arguments",self.request.arguments
            traceback.print_exc()
            self.write('ok')
            self.finish()
            return
            
        self.write('ok')
        self.finish()
def imgmsg_to_pil(img_msg, rgba=True):
    try:
        uncompressed_img_msg = sensor_msgs.msg.Image()
        uncompressed_img_msg.header = img_msg.header
        uncompressed_img_msg.height = img_msg.height
        uncompressed_img_msg.width = img_msg.width
        uncompressed_img_msg.step = 1
        uncompressed_img_msg.encoding = 'mono8'
        uncompressed_img_msg.data = snappy.uncompress(np.fromstring(img_msg.data, dtype = 'uint8'))
        if img_msg._type == 'sensor_msgs/CompressedImage':
            pil_img = Image.open(StringIO(img_msg.data))
            if pil_img.mode != 'L':
                pil_img = pil_bgr2rgb(pil_img)
        else:
            alpha = False
            if uncompressed_img_msg.encoding == 'mono8':
                mode = 'L'
            elif uncompressed_img_msg.encoding == 'rgb8':
                mode = 'BGR'
            elif uncompressed_img_msg.encoding == 'bgr8':
                mode = 'RGB'
            elif uncompressed_img_msg.encoding in ['bayer_rggb8', 'bayer_bggr8', 'bayer_gbrg8', 'bayer_grbg8']:
                mode = 'L'
            elif uncompressed_img_msg.encoding == 'mono16':
                if uncompressed_img_msg.is_bigendian:
                    mode = 'F;16B'
                else:
                    mode = 'F:16'
            elif uncompressed_img_msg.encoding == 'rgba8':
                mode = 'BGR'
                alpha = True
            elif uncompressed_img_msg.encoding == 'bgra8':
                mode = 'RGB'
                alpha = True
    
            pil_img = Image.frombuffer('RGB', (uncompressed_img_msg.width, uncompressed_img_msg.height), uncompressed_img_msg.data, 'raw', mode, 0, 1)

        if rgba and pil_img.mode != 'RGBA':
            pil_img = pil_img.convert('RGBA')
    
        return pil_img

    except Exception, ex:
        print >> sys.stderr, 'Can\'t convert image: %s' % ex
        return None
 def format(self, value):
     try:
         if is_gzip(value):
             output = gzip.decompress(value)
         elif is_lzma(value):
             output = lzma.decompress(value)
         elif is_snappy(value):
             if SNAPPY_SUPPORT:
                 output = snappy.uncompress(value)
             else:
                 return self.process_error(
                     'Cannot decompress value: '
                     'Snappy is not available on this system.')
         else:
             output = lz4.block.decompress(value)
         return output
     except OSError as e:
         return self.process_error('Cannot decompress value: {}'.format(e))
示例#23
0
def depth_from_binary(binary_name, imgsize=(240, 320)):
    """ Decode binary file containing depth images and return the depth
    images as a numpy ndarray.
    :param binary_name: The file name of the binary file to read.
    :param imgsize: The size (height, width) of each uncompressed image.
    :return: numpy array containing 'l' images of size 'imgsize'.
    """
    images = list()
    with open(binary_name, 'rb') as fp:
        b = fp.read(4)
        while b != '':
            k = struct.unpack('<L', b)[0]
            image_bytes = fp.read(k)
            images.append(snappy.uncompress(image_bytes))
            b = fp.read(4)
    l = len(images)
    images = np.array(images)
    images = np.fromstring(images, dtype=np.dtype('>u2'))
    return images.reshape((l, ) + imgsize)
示例#24
0
	def Retrieve(self, item):
		"""
		Retrieve one piece of data from the table.

		Args:
			item: A integer, which specifies the index number of the data
			to be retrieved.

		Returns:
			Bytes retrieved from the table.

		Raises:
			KeyError    : when the index number is out of range.
			RuntimeError: when failed to retrieve data.
		"""

		with self.lock.gen_rlock():

			if (self.index is None) or (self.head is None):
				raise RuntimeError('The table or the item is inaccessible')

			if self.items <= item: #atomic.load(self.items)
				raise KeyError('The item number is out of bounds')

			if self.itemOffset > item:
				raise KeyError('The item offset number is out of bounds')

			startOffset, endOffset, filenum = self.GetBounds(item - self.itemOffset)

			dataFile = self.files.get(filenum, None)
			if dataFile is None:
				raise RuntimeError('missing data file {filenum}'.format(filenum=filenum))

			dataFile.seek(startOffset)
			blob = dataFile.read(endOffset - startOffset)

		# self.readMeter.Mark(len(blob) + 2 * INDEX_ENTRY_SIZE)

		if self.noCompression:
			return blob

		return snappy.uncompress(blob)
def main():
    args = parser.parse_args()

    if args.action not in actions:
        print("Error: Invalid action %s" % args.action)
        sys.exit(1)

    def process_error(msg):
        if args.action == ACTION_VALIDATE:
            return print(json.dumps({"valid": False, "message": msg}))
        else:
            print(msg)
            sys.exit(2)

    try:
        decoded_value = base64.b64decode(args.value)
    except binascii.Error as e:
        return process_error("Cannot decode value: %s" % e)

    try:
        if is_gzip(decoded_value):
            unpacked_value = gzip.decompress(decoded_value)
        elif is_lzma(decoded_value):
            unpacked_value = lzma.decompress(decoded_value)
        elif is_snappy(decoded_value):
            unpacked_value = snappy.uncompress(decoded_value)
        else:
            unpacked_value = lz4.block.decompress(decoded_value)
    except OSError as e:
        return process_error("Cannot decompress value: %s" % e)

    unpacked_value = unpacked_value.decode()

    if args.action == ACTION_VALIDATE:
        return print(json.dumps({"valid": True, "message": ""}))
    else:
        return print(
            json.dumps({
                "output": repr(unpacked_value),
                "read-only": True,
                "format": "plain_text",
            }))
示例#26
0
    def find_by_expanded_stoichiometry(self, exp_stoich):
        """Finds all of the conformers with a stoichiometry.

    The expanded stoichiometry includes hydrogens as part of the atom type.
    See smu_utils_lib.expanded_stoichiometry_from_topology for a
    description.

    Args:
      exp_stoich: string

    Returns:
      iterable of dataset_pb2.Conformer
    """
        cur = self._conn.cursor()
        select = (f'SELECT conformer '
                  f'FROM {_CONFORMER_TABLE_NAME} '
                  f'WHERE exp_stoich = ?')
        cur.execute(select, (exp_stoich, ))
        return (dataset_pb2.Conformer().FromString(snappy.uncompress(
            result[0])) for result in cur)
示例#27
0
文件: server.py 项目: vindeka/gate
    def process_raw(self, ctx, evidence_uuid, pipeline, data, raw, return_result=False, autosave=True):
        pipeline = self._get_pipeline(pipeline)
        if not pipeline:
            return

        # StringIO for raw data
        stream = StringIO.StringIO(snappy.uncompress(base64.b64decode(raw)))

        # Perform the actual processing
        bundle = Bundle(self.server, evidence_uuid, pipeline, data, stream)
        try:
            bundle = pipeline.process(bundle)
        except Exception as e:
            bundle.add_exception(e, traceback=''.join(traceback.format_exc()))

        data = bundle.data
        if autosave:
            data = self._save_result(evidence_uuid, data, wait=return_result)

        if return_result:
            return data
def imgmsg_to_pil(img_msg, rgba=True):
    try:
        uncompressed_img_msg = sensor_msgs.msg.Image()
        uncompressed_img_msg.header = img_msg.header
        uncompressed_img_msg.height = img_msg.height
        uncompressed_img_msg.width = img_msg.width
        uncompressed_img_msg.step = 1
        uncompressed_img_msg.encoding = 'mono8'
        uncompressed_img_msg.data = snappy.uncompress(np.fromstring(img_msg.data, dtype = 'uint8'))
        alpha = False
        mode = 'L'
        pil_img = Image.frombuffer('RGB', (uncompressed_img_msg.width, uncompressed_img_msg.height), uncompressed_img_msg.data, 'raw', mode, 0, 1)

        if rgba and pil_img.mode != 'RGBA':
            pil_img = pil_img.convert('RGBA')

        return pil_img

    except Exception, ex:
        print >> sys.stderr, 'Can\'t convert image: %s' % ex
        return None
示例#29
0
 def _getblock_inet(self):
     result = BytesIO()
     last = 0
     while not last:
         if self.protocol == Protocol.prot9:
             flag = self._getbytes(2)
             unpacked = struct.unpack('<H', flag)[0]  # little endian short
             length = unpacked >> 1
             last = unpacked & 1
         else:
             flag = self._getbytes(8)
             unpacked = struct.unpack('<q',
                                      flag)[0]  # little endian long long
             length = unpacked >> 1
             last = unpacked & 1
         if length > 0:
             block = self._getbytes(length)
             if self.compression == Compression.snappy:
                 block = snappy.uncompress(block)
             result.write(block)
     return result.getvalue()
示例#30
0
def read_idtk_file(filename):
    """
    :param filename: source data filename (DTK serialized data format)
    :return: header, payload, contents, data - parsed JSON header, raw payload data, decompressed (if appropriate) payload data, and parsed JSON data
    """

    header, payload = read_idtk_file_components(filename)
    header = json.loads(header, object_pairs_hook=OrderedDict
                        )  # string isn't very useful, convert JSON to data
    contents = None

    if 'compressed' in header['metadata'] and header['metadata']['compressed']:
        contents = timing(lambda: snappy.uncompress(payload),
                          message_index=DECOMPRESS_PAYLOAD)
    else:
        contents = payload

    data = timing(lambda: json.loads(contents, object_pairs_hook=OrderedDict),
                  message_index=PARSE_JSON)

    return header, payload, contents, data
示例#31
0
def parse_header(data, offset=0):
    request_id, response_to, op_code = struct.unpack_from("<III", data, offset)
    offset += 12

    if op_code == OP_COMPRESSED:
        op_code, uncompressed_size, compressor_id = struct.unpack_from(
            "<IIB", data, offset)
        offset += 9

        if compressor_id == COMPRESSOR_ZLIB:
            data = zlib.decompress(memoryview(
                data)[offset:], bufsize=uncompressed_size)
            offset = 0
        elif compressor_id == COMPRESSOR_SNAPPY and SNAPPY_SUPPORTED:
            data = snappy.uncompress(memoryview(data)[offset:])
            offset = 0
        elif compressor_id == COMPRESSOR_NOOP:
            pass
        else:
            raise ValueError("Unsupported compressor")

    return request_id, response_to, op_code, data, offset
示例#32
0
    def find_by_expanded_stoichiometry_list(self, exp_stoichs):
        """Finds all of the molecules with a stoichiometry.

    The expanded stoichiometry includes hydrogens as part of the atom type.
    See smu_utils_lib.expanded_stoichiometry_from_topology for a
    description.

    Args:
      exp_stoichs: list of string

    Returns:
      iterable of dataset_pb2.Molecule
    """
        cur = self._conn.cursor()
        select = (''.join([
            f'SELECT conformer '
            f'FROM {_MOLECULE_TABLE_NAME} '
            f'WHERE exp_stoich IN (', ','.join('?' for _ in exp_stoichs), ')'
        ]))
        cur.execute(select, exp_stoichs)
        return (dataset_pb2.Molecule().FromString(snappy.uncompress(result[0]))
                for result in cur)
示例#33
0
    def items(self, filter=None):
        '''Yield a pair of (name, data) records in the same
        order they appear in the file.
        @p filter -- same as for get function
        '''
        records = {}

        for block_type, block_data in self._read_blocks():
            if block_type == _BLOCK_SCHEMA:
                identifier, bulk_record = self._parse_schema(
                    block_data, filter=filter)
                if identifier is None:
                    continue
                records[identifier] = bulk_record
            elif block_type == _BLOCK_DATA:
                stream = telemetry_archive.ReadStream(
                    stringio.StringIO(block_data))
                identifier = stream.read_uint32()
                flags = stream.read_uint16()

                record = records.get(identifier, None)
                if record is None:
                    continue

                if flags & BlockDataFlags.kPreviousOffset:
                    flags &= ~(BlockDataFlags.kPreviousOffset)
                    _ = stream.read_varint()
                if flags & BlockDataFlags.kSchemaCRC:
                    assert False  # not supported yet
                if flags & BlockDataFlags.kSnappy:
                    flags &= ~(BlockDataFlags.kSnappy)
                    rest = stream.stream.read()
                    stream = telemetry_archive.ReadStream(
                        stringio.StringIO(snappy.uncompress(rest)))
                assert flags == 0  # no unknown flags

                rest = stream.stream.read()
                yield record.name, record.deserialize(rest)
示例#34
0
def ParseBlock(blockBytes, compressed, crcBytes):
    if compressed == 1:
        blockBytes = snappy.uncompress(blockBytes)

    kvPair = dict()
    try:
        numRestarts = blockBytes[-1]
        stream2 = io.BytesIO(blockBytes[:-1 * (1 + 4 * numRestarts)])
        bContinue = True
        curKey = ''
        while (bContinue):
            sharedKeyLen = varint.decode_stream(stream2)
            inlineKeyLen = varint.decode_stream(stream2)
            valueLen = varint.decode_stream(stream2)
            inlineKey = stream2.read(inlineKeyLen)
            valData = stream2.read(valueLen)
            if len(inlineKey) >= 8:
                keyName = inlineKey[:-8]
                keySequence = int.from_bytes(inlineKey[-7:], 'little')
                keySt = inlineKey[-8]

                if sharedKeyLen != 0:
                    curKey = curKey[:sharedKeyLen] + keyName
                else:
                    curKey = keyName

                kvPair[curKey] = [keySt, keySequence, valData]

                if (keySequence == 0xffffffffffffff):
                    bContinue = False

            if inlineKeyLen == 0 and valueLen == 0:
                bContinue = False

    except Exception as e:
        print("ParseBlock exception: " + str(e))

    return kvPair
示例#35
0
文件: Samples.py 项目: alper-t/wax
def get_samples_from_doc(doc, is_compressed):
    """From a mongo document, fetch the data payload and decompress if
    necessary

    Args:
       doc (dictionary):  Document from mongodb to analyze

    Returns:
       bytes: decompressed data

    """
    data = doc['data']
    assert len(data) != 0

    if is_compressed:
        data = snappy.uncompress(data)

    data = np.fromstring(data,
                         dtype=SAMPLE_TYPE)
    if len(data) == 0:
        raise IndexError("Data has zero length")

    return data
示例#36
0
 def readFrom(cls, con):
     header = con.read(4)
     method = ord(header[0])
     size = (ord(header[1]) << 16) + (ord(header[2]) << 8) + (ord(
         header[3]))
     bytes = con.read(size)
     if method == Message.Encoding.Raw:
         pass
     elif method == Message.Encoding.Snappy:
         try:
             bytes = snappy.uncompress(bytes)
         except snappy.UncompressError:
             raise FramingError(FramingError.InvalidCompressedData)
         size = len(bytes)
         if size >= 1 << 24:
             raise FramingError(FramingError.MessageTooLarge)
     else:
         raise FramingError(FramingError.UnknownEncoding)
     try:
         data = msgpack.unpackb(bytes)
     except msgpack.UnpackException:
         raise FramingError(FramingError.InvalidFormatedData)
     return cls.decode(data)
示例#37
0
    def get_user_data(self):
        questionnaire_state = self._find_questionnaire_state()
        if questionnaire_state:
            version = questionnaire_state.version or 0

            try:
                # legacy data was stored in a dict, base64-encoded, and not compressed
                data = json.loads(questionnaire_state.state_data)['data']
                is_legacy_data = True
            except ValueError:
                data = questionnaire_state.state_data
                is_legacy_data = False

            decrypted_data = self.encrypter.decrypt_data(data)

            if is_legacy_data:
                decrypted_data = base64url_decode(
                    decrypted_data.decode()).decode()
            else:
                decrypted_data = snappy.uncompress(decrypted_data).decode()

            return decrypted_data, version

        return None, None
示例#38
0
    def find_by_stoichiometry(self, stoich):
        """Finds all conformers with a given stoichiometry.

    The stoichiometry is like "C6H12".

    Internally, the stoichiometry is converted a set of expanded stoichiometries
    and the query is done to find all of those.
    Notably, this means only records with status <= 512 are returned.

    Args:
      stoich: stoichiometry string like "C6H12", case doesn't matter
    Returns:
      Iterable of type dataset_pb2.Conformer.
    """
        exp_stoichs = list(
            smu_utils_lib.expanded_stoichiometries_from_stoichiometry(stoich))
        cur = self._conn.cursor()
        select = (f'SELECT conformer '
                  f'FROM {_CONFORMER_TABLE_NAME} '
                  f'WHERE exp_stoich IN (' +
                  ','.join('?' for _ in exp_stoichs) + ')')
        cur.execute(select, exp_stoichs)
        return (dataset_pb2.Conformer().FromString(snappy.uncompress(
            result[0])) for result in cur)
示例#39
0
def get_data_from_doc(doc):
    """From a mongo document, fetch the data payload and decompress if
    necessary

    Args:
       doc (dictionary):  Document from mongodb to analyze

    Returns:
       bytes: decompressed data

    """
    data = doc['data']
    assert len(data) != 0

    if doc['zipped']:
        data = snappy.uncompress(data)

    data = np.fromstring(data,
                         dtype=np.uint32)

    if len(data) == 0:
        raise IndexError("Data has zero length")

    return data
示例#40
0
 def to_python(self, value):
     return pickle.loads(snappy.uncompress(value))
示例#41
0
import sys
import os.path

from snappy import uncompress

## box_size is the width and the height of each tile
box_size = 500
## usize is the maximum of the first tag of the snappy files
usize = 25500
## vsize is the maximum of the second tag of the snappy files
vsize = 20500

if __name__ == "__main__":
    for u in range(0, usize, box_size):
        # iterate through all snappy files in data directory
        for v in range(0, vsize, box_size):
            ## compressed files are in data directory
            file = 'data/(%d, %d).snappy' % (u, v)
            if os.path.isfile(file):
                _if = file
                ## decompressed files are in decomp_data directory
                of = 'decomp_data/(%d,%d)' % (u, v)
                with open(_if, 'rb') as f:
                    ## data copied from compressed file
                    data = f.read()
                    ## uncompressed data
                    decomp = uncompress(data)
                    with open(of, 'wb') as oof:
                        oof.write(decomp)
示例#42
0
    def consume_batch_async(
        self, batch: pump.Batch
    ) -> Tuple[couchbaseConstants.PUMP_ERROR, Optional[pump.SinkBatchFuture]]:
        if not self.writer:
            self.csvfile = sys.stdout
            if self.spec.startswith(CSVSink.CSV_JSON_SCHEME):
                if len(batch.msgs) <= 0:
                    future = pump.SinkBatchFuture(self, batch)
                    self.future_done(future, 0)
                    return 0, future

                cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = batch.msgs[
                    0][:8]
                doc = json.loads(val_bytes)
                self.fields = sorted(doc.keys())
                if 'id' not in self.fields:
                    self.fields = ['id'] + self.fields
                if self.spec.endswith(".csv"):
                    filename = self.get_csvfile(
                        self.spec[len(CSVSink.CSV_JSON_SCHEME):])
                    try:
                        self.csvfile = open(filename, "w", encoding='utf-8')
                    except IOError as e:
                        return f'error: could not write csv to file: {filename}', None
                self.writer = csv.writer(self.csvfile)
                self.writer.writerow(self.fields)
            else:
                if self.spec.endswith(".csv"):
                    filename = self.get_csvfile(
                        self.spec[len(CSVSink.CSV_SCHEME):])
                    try:
                        self.csvfile = open(filename, "w", encoding='utf-8')
                    except IOError as e:
                        return f'error: could not write csv to file: {filename}', None
                self.writer = csv.writer(self.csvfile)
                self.writer.writerow([
                    'id', 'flags', 'expiration', 'cas', 'value', 'rev', 'vbid',
                    'dtype'
                ])
        msg_tuple_format = 0
        for msg in batch.msgs:
            cmd, vbucket_id, key, flg, exp, cas, meta, val_bytes = msg[:8]
            if self.skip(key, vbucket_id):
                continue
            if not msg_tuple_format:
                msg_tuple_format = len(msg)
            seqno = dtype = nmeta = 0
            if msg_tuple_format > 8:
                seqno, dtype, nmeta, conf_res = msg[8:12]
            if dtype > 2:
                try:
                    val_bytes = snappy.uncompress(val_bytes)
                except Exception as err:
                    pass
            try:
                if cmd in [
                        couchbaseConstants.CMD_TAP_MUTATION,
                        couchbaseConstants.CMD_DCP_MUTATION
                ]:
                    if self.fields:
                        if val_bytes and len(val_bytes) > 0:
                            try:
                                row = []
                                doc = json.loads(val_bytes)
                                if type(doc) == dict:
                                    for field in self.fields:
                                        if field == 'id':
                                            row.append(pump.returnString(key))
                                        else:
                                            row.append(doc[field])
                                    self.writer.writerow(row)
                            except ValueError:
                                pass
                    else:
                        #rev = self.convert_meta(meta)
                        self.writer.writerow([
                            pump.returnString(key), flg, exp, cas, val_bytes,
                            meta, vbucket_id, dtype
                        ])
                elif cmd in [
                        couchbaseConstants.CMD_TAP_DELETE,
                        couchbaseConstants.CMD_DCP_DELETE
                ]:
                    pass
                elif cmd == couchbaseConstants.CMD_GET:
                    pass
                else:
                    return f'error: CSVSink - unknown cmd: {cmd!s}', None
            except IOError:
                return "error: could not write csv to stdout", None

        future = pump.SinkBatchFuture(self, batch)
        self.future_done(future, 0)
        return 0, future
示例#43
0
 def test_simple_compress(self):
     text = "hello world!".encode('utf-8')
     compressed = snappy.compress(text)
     self.assertEqual(text, snappy.uncompress(compressed))
示例#44
0
    def send_msgs(self, conn: cb_bin_client.MemcachedClient, msgs: List[couchbaseConstants.BATCH_MSG], operation: str,
                  vbucket_id: Optional[int] = None) -> couchbaseConstants.PUMP_ERROR:
        m: List[bytes] = []

        msg_format_length = 0
        for i, msg in enumerate(msgs):
            if not msg_format_length:
                msg_format_length = len(msg)
            cmd, vbucket_id_msg, key, flg, exp, cas, meta, val = msg[:8]
            seqno = dtype = nmeta = conf_res = 0
            if msg_format_length > 8:
                seqno, dtype, nmeta, conf_res = msg[8:12]
            if vbucket_id is not None:
                vbucket_id_msg = vbucket_id

            if self.skip(key, vbucket_id_msg):
                continue

            if cmd == couchbaseConstants.CMD_SUBDOC_MULTIPATH_MUTATION:
                err, req = self.format_multipath_mutation(key, val, vbucket_id_msg, cas, i)
                if err:
                    return err
                self.append_req(m, req)
                continue
            if cmd == couchbaseConstants.CMD_SUBDOC_MULTIPATH_LOOKUP:
                err, req = self.format_multipath_lookup(key, val, vbucket_id_msg, cas, i)
                if err:
                    return err
                self.append_req(m, req)
                continue

            rv, translated_cmd = self.translate_cmd(cmd, operation, meta)
            if translated_cmd is None:
                return rv
            if dtype > 2:
                if self.uncompress and val:
                    try:
                        val = snappy.uncompress(val)
                    except Exception as err:
                        pass
            if translated_cmd == couchbaseConstants.CMD_GET:
                val, flg, exp, cas = b'', 0, 0, 0
            if translated_cmd == couchbaseConstants.CMD_NOOP:
                key, val, flg, exp, cas = b'', b'', 0, 0, 0
            if translated_cmd == couchbaseConstants.CMD_DELETE:
                val = b''
            # A tombstone can contain Xattrs
            if translated_cmd == couchbaseConstants.CMD_DELETE_WITH_META and not dtype & couchbaseConstants.DATATYPE_HAS_XATTR:
                val = b''
            # on mutations filter txn related data
            if translated_cmd == couchbaseConstants.CMD_SET_WITH_META or translated_cmd == couchbaseConstants.CMD_SET:
                if not getattr(self.opts, 'force_txn', False):
                    skip, val, cas, exp, dtype = self.filter_out_txn(key, val, cas, exp, dtype)
                    if skip:
                        continue

            rv, req = self.cmd_request(translated_cmd, vbucket_id_msg, key, val,  # type: ignore
                                       ctypes.c_uint32(flg).value,
                                       exp, cas, meta, i, dtype, nmeta,
                                       conf_res)  # type: ignore
            if rv != 0:
                return rv

            self.append_req(m, req)

        if m:
            try:
                conn.s.sendall(self.join_str_and_bytes(m))  # type: ignore
            except socket.error as e:
                return f'error: conn.sendall() exception: {e}'

        return 0
示例#45
0
    def consume_batch_async(self, batch):
        op = self.operation()
        op_mutate = op in ['set', 'add']

        stdout = sys.stdout
        msg_visitor = None

        opts_etc = getattr(self.opts, "etc", None)
        if opts_etc:
            stdout = opts_etc.get("stdout", sys.stdout)
            msg_visitor = opts_etc.get("msg_visitor", None)

        mcd_compatible = self.opts.extra.get("mcd_compatible", 1)
        msg_tuple_format = 0
        for msg in batch.msgs:
            if msg_visitor:
                msg = msg_visitor(msg)
            if not msg_tuple_format:
                msg_tuple_format = len(msg)
            cmd, vbucket_id, key, flg, exp, cas, meta, val = msg[:8]
            seqno = dtype = nmeta = conf_res = 0
            if msg_tuple_format > 8:
                seqno, dtype, nmeta, conf_res = msg[8:]
            if self.skip(key, vbucket_id):
                continue
            if dtype > 2:
                try:
                    val = snappy.uncompress(val)
                except Exception:
                    pass
            try:
                if cmd in [
                        couchbaseConstants.CMD_TAP_MUTATION,
                        couchbaseConstants.CMD_DCP_MUTATION
                ]:
                    if op_mutate:
                        # <op> <key> <flags> <exptime> <bytes> [noreply]\r\n
                        if mcd_compatible:
                            stdout.write(
                                f'{op} {key} {flg} {exp} {len(val)!s}\r\n')
                        else:
                            stdout.write(
                                f'{op} {key} {flg} {exp} {len(val)} {seqno} {dtype} {conf_res}\r\n'
                            )

                        try:
                            stdout.write(val.decode())
                        except TypeError:
                            stdout.write(f'{val}')

                        stdout.write("\r\n")
                    elif op == 'get':
                        stdout.write(f'get {key}\r\n')
                elif cmd in [
                        couchbaseConstants.CMD_TAP_DELETE,
                        couchbaseConstants.CMD_DCP_DELETE
                ]:
                    if op_mutate:
                        stdout.write(f'delete {key}\r\n')
                elif cmd == couchbaseConstants.CMD_GET:
                    stdout.write(f'get {key}\r\n')
                else:
                    return f'error: StdOutSink - unknown cmd: {cmd!s}', None
            except IOError:
                return "error: could not write to stdout", None

        stdout.flush()
        future = SinkBatchFuture(self, batch)
        self.future_done(future, 0)
        return 0, future
def  search(primary_keys_map,to_be_compressed_input,collection_name,tofind,MAX_RESULTS=1000):
	INDEX_DIR_DEFAULT="IndexFiles.index"
	if collection_name!="DEFAULT":
		INDEX_DIR=collection_name
	else:
		INDEX_DIR=INDEX_DIR_DEFAULT
	try:
		print "********" + tofind
		tofind_keyvalue_pairs=json.loads(tofind)
	except:
		return 100	
	direc=SimpleFSDirectory(File(INDEX_DIR))
	analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)
	try:
		ireader=IndexReader.open(direc)	
		searcher=IndexSearcher(ireader)
	except:
		return 105

	#initializing return list 
	return_list=[]
	#check_list=[]
	tofind_primary_keyvalue_pairs={}
	tofind_nonprimary_keyvalue_pairs={}

	#separating out primary and non_primary keys
	for key in tofind_keyvalue_pairs.keys():
		if key in primary_keys_map:
			tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]
		else:
			tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]

	#filtering documents		
	if len(tofind_primary_keyvalue_pairs)>0:		
		query=BooleanQuery()
		for key in tofind_primary_keyvalue_pairs.keys():
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key])
			query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		hits=searcher.search(query,MAX_RESULTS).scoreDocs
		for hit in hits:
			doc=searcher.doc(hit.doc)
			if to_be_compressed_input==True:
				data=snappy.uncompress(doc.get("$DATA$"))
			else:
				data=doc.get("$DATA$")
			#non primary key filtering(without having to load all the primary key filtered values into main memory!)	
			if len(tofind_nonprimary_keyvalue_pairs)>0:
				entry=json.loads(data)
				satisfied=True
				for key in tofind_nonprimary_keyvalue_pairs.keys():
					if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]:
						satisfied=False
						break
				if satisfied==True:
					return_list.append(data)
			else:
				return_list.append(data)
			
	else:
		for i in range(0,ireader.numDocs()):
			doc=searcher.doc(i)
			if to_be_compressed_input==True:
				data=snappy.uncompress(str(doc.get("$DATA$")))
			else:
				data=doc.get("$DATA$")

				
			#non primary key filtering(without having to load all the primary key filtered values into main memory!)	
			if len(tofind_nonprimary_keyvalue_pairs)>0:
				entry=json.loads(data)
				satisfied=True
				for key in tofind_nonprimary_keyvalue_pairs.keys():
					if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]:
						satisfied=False
						break
				if satisfied==True:
					return_list.append(data)
			else:
				return_list.append(data)
			
	ireader.close()

	if len(return_list)==0:
		return None	
	else:
		return return_list 
def update(primary_keys_map,to_be_compressed_input,collection_name,tofind,update,commit=False,add_field_if_not_exists=True):
	INDEX_DIR_DEFAULT="IndexFiles.index"
	#As of now the update will be implemented as search,modify data in json file,delete and re-write
	if collection_name!="DEFAULT":
		INDEX_DIR=collection_name
	else:
		INDEX_DIR=INDEX_DIR_DEFAULT
	try:
		tofind_keyvalue_pairs=json.loads(tofind)
	except:
		return 100	
	direc=SimpleFSDirectory(File(INDEX_DIR))
	analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)
	try:
		ireader=IndexReader.open(direc)	
		searcher=IndexSearcher(ireader)
		#setting writer configurations
		config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
		config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
		writer=IndexWriter(direc,config)
	except:
		return 105
	no_of_documents_modified=0	
	#finding the document to update
	#Scope for making this more efficient
	def rewrite(data_string):
		data=json.loads(data_string)
		toupdate=json.loads(update)
		#primary_key_modified=False

		#delete the appropriate document
		query=BooleanQuery()
		for key in primary_keys_map:
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key])
			query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		

		#modify the values
		for key,value in toupdate.items():
			#if such a key is not present the we either add and update that key into data,or just ignore it!(By default it is set to True!)	
			if add_field_if_not_exists==False:
				if key in data.keys():
					data[key]=value
			else:		
				data[key]=value

		#this deletion statement has been intenstionally added here		
		#only if the modified data,has primary keys already not existing,will the updating process continue
		query_search=BooleanQuery()
		for key in primary_keys_map:
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key])
			query_search.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		hits=searcher.search(query_search,MAX_RESULTS).scoreDocs
		if len(hits) > 0:
			return 106			
		writer.deleteDocuments(query)

		#add the newly modified document
		doc=Document()
		#index files wrt primary key
		for primary_key in primary_keys_map:
			try:
				field=Field(primary_key,data[primary_key],Field.Store.NO,Field.Index.ANALYZED)
				doc.add(field)
			except:
				# primary_keys_map.pop(collection_name)
				return 101
		#compress data using snappy if compression is on		
		if to_be_compressed_input==True:
			data_string=snappy.compress(str(json.dumps(data)))
		else:
			data_string=json.dumps(data)	
		field=Field("$DATA$",data_string,Field.Store.YES,Field.Index.ANALYZED)
		doc.add(field)
		writer.addDocument(doc)

	tofind_primary_keyvalue_pairs={}
	tofind_nonprimary_keyvalue_pairs={}

	#separating out primary and non_primary keys
	for key in tofind_keyvalue_pairs.keys():
		if key in primary_keys_map:
			tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]
		else:
			tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]

	#filtering documents		
	if len(tofind_primary_keyvalue_pairs)>0:		
		query=BooleanQuery()
		for key in tofind_primary_keyvalue_pairs.keys():
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key])
			query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		hits=searcher.search(query,MAX_RESULTS).scoreDocs
		
		for hit in hits:
			doc=searcher.doc(hit.doc)
			if to_be_compressed_input==True:
				data=snappy.uncompress(doc.get("$DATA$"))
			else:
				data=doc.get("$DATA$")
			#non primary key filtering(without having to load all the primary key filtered values into main memory!)	
			if len(tofind_nonprimary_keyvalue_pairs)>0:
				entry=json.loads(data)
				satisfied=True
				for key in tofind_nonprimary_keyvalue_pairs.keys():
					if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]:
						satisfied=False
						break
				if satisfied==True:
					if rewrite(data)!=106:
						no_of_documents_modified+=1
					else:
						writer.rollback()
						return 106	
			else:
				if rewrite(data)!=106:
					no_of_documents_modified+=1
				else:
					writer.rollback()
					return 106
				
			
	else:
		for i in range(0,ireader.numDocs()):
			doc=searcher.doc(i)
			if to_be_compressed_input==True:
				data=snappy.uncompress(doc.get("$DATA$"))
			else:
				data=doc.get("$DATA$")
			#non primary key filtering(without having to load all the primary key filtered values into main memory!)	
			if len(tofind_nonprimary_keyvalue_pairs)>0:
				entry=json.loads(data)
				satisfied=True
				for key in tofind_nonprimary_keyvalue_pairs.keys():
					if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]:
						satisfied=False
						break
				if satisfied==True:
					if rewrite(data)!=106:
						no_of_documents_modified+=1
					else:
						writer.rollback()
						return 106
			else:
				if rewrite(data)!=106:
					no_of_documents_modified+=1
				else:
					writer.rollback()
					return 106
			
	
	ireader.close()
	if commit==True:
			writer.commit()
	writer.close()
	return str(no_of_documents_modified)+" have been modified"
示例#48
0
 def test_unicode_compress(self):
     text = "hello unicode world!".decode('utf-8')
     compressed = snappy.compress(text)
     self.assertEqual(text, snappy.uncompress(compressed))
示例#49
0
 def test_valid_compressed_buffer(self):
     text = "hello world!".encode('utf-8')
     compressed = snappy.compress(text)
     uncompressed = snappy.uncompress(compressed)
     self.assertEqual(text == uncompressed,
                      snappy.isValidCompressed(compressed))
示例#50
0
 def getImageFeatures(self, image , image_shape):
     image = np.fromstring(snappy.uncompress(image), dtype=np.float32)
     image.resize(image_shape)
     feature_dic = self.extractor.getImageFeatures(image)
     feature_dic = {layer:snappy.compress(features) for layer,features in feature_dic.items()}
     return feature_dic
示例#51
0
 def test_randombytes2_compress(self):
     _bytes = bytes(os.urandom(10000))
     compressed = snappy.compress(_bytes)
     self.assertEqual(_bytes, snappy.uncompress(compressed))
示例#52
0
def DXHTTPRequest(resource, data, method='POST', headers={}, auth=True, timeout=600,
                  use_compression=None, jsonify_data=True, want_full_response=False,
                  prepend_srv=True, session_handler=None,
                  max_retries=DEFAULT_RETRIES, always_retry=False, **kwargs):
    '''
    :param resource: API server route, e.g. "/record/new"
    :type resource: string
    :param data: Content of the request body
    :type data: list or dict, if *jsonify_data* is True; or string or file-like object, otherwise
    :param headers: Names and values of HTTP headers to submit with the request (in addition to those needed for authentication, compression, or other options specified with the call).
    :type headers: dict
    :param auth: Overrides the *auth* value to pass through to :meth:`requests.request`. By default a token is obtained from the ``DX_SECURITY_CONTEXT``.
    :type auth: tuple, object, True (default), or None
    :param timeout: HTTP request timeout, in seconds
    :type timeout: float
    :param config: *config* value to pass through to :meth:`requests.request`
    :type config: dict
    :param use_compression: "snappy" to use Snappy compression, or None
    :type use_compression: string or None
    :param jsonify_data: If True, *data* is converted from a Python list or dict to a JSON string
    :type jsonify_data: boolean
    :param want_full_response: If True, the full :class:`requests.Response` object is returned (otherwise, only the content of the response body is returned)
    :type want_full_response: boolean
    :param prepend_srv: If True, prepends the API server location to the URL
    :type prepend_srv: boolean
    :param max_retries: Maximum number of retries to perform for a request. A "failed" request is retried if any of the following is true:

                        - A response is received from the server, and the content length received does not match the "Content-Length" header.
                        - A response is received from the server, and the response has an HTTP status code in 5xx range.
                        - A response is received from the server, the "Content-Length" header is not set, and the response JSON cannot be parsed.
                        - No response is received from the server, and either *always_retry* is True or the request *method* is "GET".

    :type max_retries: int
    :param always_retry: If True, indicates that it is safe to retry a request on failure

                        - Note: It is not guaranteed that the request will *always* be retried on failure; rather, this is an indication to the function that it would be safe to do so.

    :type always_retry: boolean
    :returns: Response from API server in the format indicated by *want_full_response*. Note: if *want_full_response* is set to False and the header "content-type" is found in the response with value "application/json", the body of the response will **always** be converted from JSON to a Python list or dict before it is returned.
    :raises: :exc:`DXAPIError` if the server returned a non-200 status code; :exc:`requests.exceptions.HTTPError` if an invalid response was received from the server; or :exc:`requests.exceptions.ConnectionError` if a connection cannot be established.

    Wrapper around :meth:`requests.request()` that makes an HTTP
    request, inserting authentication headers and (by default)
    converting *data* to JSON.

    .. note:: Bindings methods that make API calls make the underlying
       HTTP request(s) using :func:`DXHTTPRequest`, and most of them
       will pass any unrecognized keyword arguments you have supplied
       through to :func:`DXHTTPRequest`.

    '''
    if session_handler is None:
        session_handler = SESSION_HANDLERS[os.getpid()]

    global _UPGRADE_NOTIFY

    url = APISERVER + resource if prepend_srv else resource
    method = method.upper() # Convert method name to uppercase, to ease string comparisons later
    if _DEBUG:
        from repr import Repr
        print >>sys.stderr, method, url, "=>", Repr().repr(data)

    if auth is True:
        auth = AUTH_HELPER

    # When *data* is bytes but *headers* contains Unicode strings, httplib tries to concatenate them and decode *data*,
    # which should not be done. Also, per HTTP/1.1 headers must be encoded with MIME, but we'll disregard that here, and
    # just encode them with the Python default (ascii) and fail for any non-ascii content.
    headers = {k.encode(): v.encode() for k, v in headers.iteritems()}

    # This will make the total number of retries MAX_RETRIES^2 for some errors. TODO: check how to better integrate with requests retry logic.
    # config.setdefault('max_retries', MAX_RETRIES)
    if jsonify_data:
        data = json.dumps(data)
        if 'Content-Type' not in headers and method == 'POST':
            headers['Content-Type'] = 'application/json'

    # If the input is a buffer, its data gets consumed by
    # requests.request (moving the read position). Record the initial
    # buffer position so that we can return to it if the request fails
    # and needs to be retried.
    rewind_input_buffer_offset = None
    if hasattr(data, 'seek') and hasattr(data, 'tell'):
        rewind_input_buffer_offset = data.tell()

    headers['DNAnexus-API'] = API_VERSION
    headers['User-Agent'] = USER_AGENT

    if use_compression == 'snappy':
        if not snappy_available:
            raise DXError("Snappy compression requested, but the snappy module is unavailable")
        headers['accept-encoding'] = 'snappy'

    if 'verify' not in kwargs and 'DX_CA_CERT' in os.environ:
        kwargs['verify'] = os.environ['DX_CA_CERT']
        if os.environ['DX_CA_CERT'] == 'NOVERIFY':
            kwargs['verify'] = False

    response, last_error = None, None
    for retry in range(max_retries + 1):
        streaming_response_truncated = False
        try:
            response = session_handler.request(method, url, data=data, headers=headers, timeout=timeout, auth=auth,
                                               **kwargs)

            if _UPGRADE_NOTIFY and response.headers.get('x-upgrade-info', '').startswith('A recommended update is available') and not os.environ.has_key('_ARGCOMPLETE'):
                logger.info(response.headers['x-upgrade-info'])
                try:
                    with file(_UPGRADE_NOTIFY, 'a'):
                        os.utime(_UPGRADE_NOTIFY, None)
                except:
                    pass
                _UPGRADE_NOTIFY = False

            if _DEBUG:
                print >>sys.stderr, method, url, "<=", response.status_code, Repr().repr(response.content)

            # If HTTP code that is not 200 (OK) is received and the content is
            # JSON, parse it and throw the appropriate error.  Otherwise,
            # raise the usual exception.
            if response.status_code != requests.codes.ok:
                # response.headers key lookup is case-insensitive
                if response.headers.get('content-type', '').startswith('application/json'):
                    content = json.loads(response.content)
                    raise DXAPIError(content,
                                     response.status_code)
                response.raise_for_status()

            if want_full_response:
                return response
            else:
                if 'content-length' in response.headers:
                    if int(response.headers['content-length']) != len(response.content):
                        raise ContentLengthError("Received response with content-length header set to %s but content length is %d"
                            % (response.headers['content-length'], len(response.content)))

                if use_compression and response.headers.get('content-encoding', '') == 'snappy':
                    # TODO: check if snappy raises any exceptions on truncated response content
                    decoded_content = snappy.uncompress(response.content)
                else:
                    decoded_content = response.content

                if response.headers.get('content-type', '').startswith('application/json'):
                    try:
                        return json.loads(decoded_content)
                    except ValueError:
                        # If a streaming API call (no content-length
                        # set) encounters an error it may just halt the
                        # response because it has no other way to
                        # indicate an error. Under these circumstances
                        # the client sees unparseable JSON, and we
                        # should be able to recover.
                        streaming_response_truncated = 'content-length' not in response.headers
                        raise HTTPError("Invalid JSON received from server")
                return decoded_content
        except (DXAPIError, ConnectionError, HTTPError, Timeout, httplib.HTTPException) as e:
            last_error = e

            # TODO: support HTTP/1.1 503 Retry-After
            # TODO: if the socket was dropped mid-request, ConnectionError or httplib.IncompleteRead is raised,
            # but non-idempotent requests can be unsafe to retry
            # Distinguish between connection initiation errors and dropped socket errors
            if retry < max_retries:
                if (response is None) or isinstance(e, ContentLengthError):
                    ok_to_retry = always_retry or (method == 'GET')
                else:
                    ok_to_retry = (response.status_code >= 500 and response.status_code < 600) or streaming_response_truncated

                if ok_to_retry:
                    if rewind_input_buffer_offset is not None:
                        data.seek(rewind_input_buffer_offset)
                    delay = 2 ** (retry+1)
                    logger.warn("%s %s: %s. Waiting %d seconds before retry %d of %d..." % (method, url, str(e), delay, retry+1, max_retries))
                    time.sleep(delay)
                    continue
            break
        if last_error is None:
            last_error = DXError("Internal error in DXHTTPRequest")
    raise last_error
示例#53
0
def uncompress_array(compressed: CompressedArray) -> np.ndarray:
    """Uncompresses a numpy array with snappy given its shape and dtype."""
    compressed_array, shape, dtype = compressed
    byte_string = snappy.uncompress(compressed_array)
    return np.frombuffer(byte_string, dtype=dtype).reshape(shape)
示例#54
0
# RNG
import secrets
def get_salt(length):
    # return random.randbytes(length)
    # return secrets.token_bytes(length)
    return os.urandom(length)

# compression
# pip install python-snappy
import snappy

if __name__ == '__main__':
    toc = b"jim"
    compressed = snappy.compress(toc)
    print('comp',len(toc),len(compressed),)
    print(snappy.uncompress(compressed))

    toc = toc * 20
    compressed = snappy.compress(toc)
    print('comp',len(toc),len(compressed), type(compressed))

# use compression only if gets smaller.
def compress(p):
    checksize(p)
    c = snappy.compress(p)
    return (c if len(c) < len(p) else p)

def checksize(ba):
    if len(ba) > 1024:
        raise Exception('input too large (we have to protect our servers)')
示例#55
0
def parse(kind, compressed, message):
    if compressed:
        message = snappy.uncompress(message)

    return protobuf.parse(IMPL_BY_KIND[kind], message)
示例#56
0
 def test_moredata_compress(self):
     text = "snappy +" * 1000 + " " + "by " * 1000 + " google"
     text = text.encode('utf-8')
     compressed = snappy.compress(text)
     self.assertEqual(text, snappy.uncompress(compressed))
示例#57
0
 def run_call(self, image):
     client = msgpackrpc.Client(msgpackrpc.Address(self.hostname, 18800))
     feature_dic = client.call('getImageFeatures', snappy.compress(image), image.shape)
     feature_dic = {layer:np.fromstring(snappy.uncompress(features), dtype=np.float32) for layer,features in feature_dic.items()}
     labels = client.call('getImageLabels')
     return feature_dic, labels
示例#58
0
            def decompress(self, data):
                """ Decompress data using a snappy decompressor.

                :return: data as a bytes object.
                """
                return snappy.uncompress(data)
示例#59
0
 def test_randombytes_compress(self):
     _bytes = repr(os.urandom(1000)).encode('utf-8')
     compressed = snappy.compress(_bytes)
     self.assertEqual(_bytes, snappy.uncompress(compressed))