Пример #1
0
 def _read_block_header(self):
     self.block_count = self.raw_decoder.read_long()
     if self.codec == "null":
         # Skip a long; we don't need to use the length.
         self.raw_decoder.skip_long()
         self._datum_decoder = self._raw_decoder
     elif self.codec == 'deflate':
         # Compressed data is stored as (length, data), which
         # corresponds to how the "bytes" type is encoded.
         data = self.raw_decoder.read_bytes()
         # -15 is the log of the window size; negative indicates
         # "raw" (no zlib headers) decompression.    See zlib.h.
         uncompressed = zlib.decompress(data, -15)
         self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
     elif self.codec == 'snappy':
         # Compressed data includes a 4-byte CRC32 checksum
         length = self.raw_decoder.read_long()
         data = self.raw_decoder.read(length - 4)
         uncompressed = snappy.decompress(data)
         self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
         self.raw_decoder.check_crc32(uncompressed);
     elif self.codec == 'xz':
         # Compressed data is stored as (length, data), which
         # corresponds to how the "bytes" type is encoded.
         data = self.raw_decoder.read_bytes()
         uncompressed = lzma.decompress(data)
         self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed))
     else:
         raise DataFileException("Unknown codec: %r" % self.codec)
Пример #2
0
def check_skip_number(number_type):
    print_name('TEST SKIP %s' % number_type.upper())
    correct = 0
    for value_to_skip, hex_encoding in BINARY_ENCODINGS:
        VALUE_TO_READ = 6253
        print('Value to Skip: %d' % value_to_skip)

        # write the value to skip and a known value
        writers_schema = schema.parse('"%s"' % number_type.lower())
        writer, encoder, datum_writer = write_datum(value_to_skip,
                                                    writers_schema)
        datum_writer.write(VALUE_TO_READ, encoder)

        # skip the value
        reader = StringIO(writer.getvalue())
        decoder = io.BinaryDecoder(reader)
        decoder.skip_long()

        # read data from string buffer
        datum_reader = io.DatumReader(writers_schema)
        read_value = datum_reader.read(decoder)

        print('Read Value: %d' % read_value)
        if read_value == VALUE_TO_READ: correct += 1
        print('')
    return correct
Пример #3
0
 def _process_handshake(self, call_response, message_name, request_datum):
     # process the handshake and call response
     buffer_decoder = io.BinaryDecoder(StringIO(call_response))
     call_response_exists = self.read_handshake_response(buffer_decoder)
     if call_response_exists:
         return self.read_call_response(message_name, buffer_decoder)
     else:
         return self.request(message_name, request_datum)
Пример #4
0
    def issue_request(self, call_request, message_name, request_datum):
        call_response = self.transceiver.transceive(call_request)

        # process the handshake and call response
        buffer_decoder = io.BinaryDecoder(StringIO(call_response))
        call_response_exists = self.read_handshake_response(buffer_decoder)
        if call_response_exists:
            return self.read_call_response(message_name, buffer_decoder)
        else:
            return self.request(message_name, request_datum)
Пример #5
0
  def test_no_default_value(self):
    print_name('TEST NO DEFAULT VALUE')
    writers_schema = LONG_RECORD_SCHEMA
    datum_to_write = LONG_RECORD_DATUM

    readers_schema = schema.parse("""\
      {"type": "record", "name": "Test",
       "fields": [{"name": "H", "type": "int"}]}""")

    writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
    with self.assertRaises(io.SchemaResolutionException) as context:
        reader = StringIO(writer.getvalue())
        decoder = io.BinaryDecoder(reader)
        datum_reader = io.DatumReader(writers_schema, readers_schema)
Пример #6
0
  def test_unknown_symbol(self):
    print_name('TEST UNKNOWN SYMBOL')
    writers_schema = schema.parse("""\
      {"type": "enum", "name": "Test",
       "symbols": ["FOO", "BAR"]}""")
    datum_to_write = 'FOO'

    readers_schema = schema.parse("""\
      {"type": "enum", "name": "Test",
       "symbols": ["BAR", "BAZ"]}""")

    with self.assertRaises(io.SchemaResolutionException) as context:
        writer, encoder, datum_writer = write_datum(datum_to_write, writers_schema)
        reader = StringIO(writer.getvalue())
        decoder = io.BinaryDecoder(reader)
        datum_reader = io.DatumReader(writers_schema, readers_schema)
Пример #7
0
    def input(self, data, count):
        """ Recieve input from the server

    Parameters
    ------------------------------------------------------
    data - Sould containg the bytes encoding the serialized data
          - I think this gets represented as a tring
    count - how many input records are provided in the binary stream
    """
        try:
            # to avio.BinaryDecoder
            bdata = StringIO(data)
            decoder = avio.BinaryDecoder(bdata)

            for i in range(count):
                if (self.taskType == TaskType.MAP):
                    inRecord = self.inReader.read(decoder)

                    # Do we need to pass midCollector if its declared as an instance variable
                    self.map(inRecord, self.midCollector)

                elif (self.taskType == TaskType.REDUCE):

                    # store the previous record
                    prev = self.midRecord

                    # read the new record
                    self.midRecord = self.midReader.read(decoder)
                    if (prev != None and not (keys_are_equal(
                            self.midRecord, prev, self._red_fkeys))):
                        # since the key has changed we need to finalize the processing
                        # for this group of key,value pairs
                        self.reduceFlush(prev, self.outCollector)
                    self.reduce(self.midRecord, self.outCollector)

        except Exception as e:
            estr = traceback.format_exc()
            self.log.warning("failing: " + estr)
            self.fail(estr)
Пример #8
0
    def __init__(self, reader, datum_reader):
        self._reader = reader
        self._raw_decoder = io.BinaryDecoder(reader)
        self._datum_decoder = None # Maybe reset at every block.
        self._datum_reader = datum_reader
        
        # read the header: magic, meta, sync
        self._read_header()

        # ensure codec is valid
        self.codec = self.get_meta(CODEC_KEY)
        if self.codec is None:
            self.codec = "null"
        if self.codec not in VALID_CODECS:
            raise DataFileException('Unknown codec: %s.' % self.codec)

        # get file length
        self._file_length = self.determine_file_length()

        # get ready to read
        self._block_count = 0
        self.datum_reader.writers_schema = schema.parse(self.get_meta(SCHEMA_KEY))
Пример #9
0
def read_datum(buffer, writers_schema, readers_schema=None):
    reader = StringIO(buffer.getvalue())
    decoder = io.BinaryDecoder(reader)
    datum_reader = io.DatumReader(writers_schema, readers_schema)
    return datum_reader.read(decoder)
Пример #10
0
    def respond(self, call_request):
        """
    Called by a server to deserialize a request, compute and serialize
    a response or error. Compare to 'handle()' in Thrift.
    """
        buffer_reader = StringIO(call_request)
        buffer_decoder = io.BinaryDecoder(buffer_reader)
        buffer_writer = StringIO()
        buffer_encoder = io.BinaryEncoder(buffer_writer)
        error = None
        response_metadata = {}

        try:
            remote_protocol = self.process_handshake(buffer_decoder,
                                                     buffer_encoder)
            # handshake failure
            if remote_protocol is None:
                return buffer_writer.getvalue()

            # read request using remote protocol
            request_metadata = META_READER.read(buffer_decoder)
            remote_message_name = buffer_decoder.read_utf8()

            # get remote and local request schemas so we can do
            # schema resolution (one fine day)
            remote_message = remote_protocol.messages.get(remote_message_name)
            if remote_message is None:
                fail_msg = 'Unknown remote message: %s' % remote_message_name
                raise schema.AvroException(fail_msg)
            local_message = self.local_protocol.messages.get(
                remote_message_name)
            if local_message is None:
                fail_msg = 'Unknown local message: %s' % remote_message_name
                raise schema.AvroException(fail_msg)
            writers_schema = remote_message.request
            readers_schema = local_message.request
            request = self.read_request(writers_schema, readers_schema,
                                        buffer_decoder)

            # perform server logic
            try:
                response = self.invoke(local_message, request)
            except AvroRemoteException as e:
                error = e
            except Exception as e:
                error = AvroRemoteException(str(e))

            # write response using local protocol
            META_WRITER.write(response_metadata, buffer_encoder)
            buffer_encoder.write_boolean(error is not None)
            if error is None:
                writers_schema = local_message.response
                self.write_response(writers_schema, response, buffer_encoder)
            else:
                writers_schema = local_message.errors
                self.write_error(writers_schema, error, buffer_encoder)
        except schema.AvroException as e:
            error = AvroRemoteException(str(e))
            buffer_encoder = io.BinaryEncoder(StringIO())
            META_WRITER.write(response_metadata, buffer_encoder)
            buffer_encoder.write_boolean(True)
            self.write_error(SYSTEM_ERROR_SCHEMA, error, buffer_encoder)
        return buffer_writer.getvalue()