def decode_from_stream(self, in_stream, nested): row_kind_value, fields = self._decode_one_row_from_stream( in_stream, nested) row = Row(*fields) row.set_field_names(self.field_names) row.set_row_kind(RowKind(row_kind_value)) return row
def convert_to_python_obj(data, type_info): if type_info == Types.PICKLED_BYTE_ARRAY(): return pickle.loads(data) elif isinstance(type_info, ExternalTypeInfo): return convert_to_python_obj(data, type_info._type_info) else: gateway = get_gateway() pickle_bytes = gateway.jvm.PythonBridgeUtils. \ getPickledBytesFromJavaObject(data, type_info.get_java_type_info()) if isinstance(type_info, RowTypeInfo) or isinstance( type_info, TupleTypeInfo): field_data = zip(list(pickle_bytes[1:]), type_info.get_field_types()) fields = [] for data, field_type in field_data: if len(data) == 0: fields.append(None) else: fields.append( pickled_bytes_to_python_converter(data, field_type)) if isinstance(type_info, RowTypeInfo): return Row.of_kind( RowKind(int.from_bytes(pickle_bytes[0], 'little')), *fields) else: return tuple(fields) else: return pickled_bytes_to_python_converter(pickle_bytes, type_info)
def to_internal(self, value) -> IN: if value is None: return None row = Row() row._values = [self._field_data_converters[i].to_internal(item) for i, item in enumerate(value[1])] row.set_field_names(self._field_names) row.set_row_kind(RowKind(value[0])) return row
def to_internal(self, value) -> IN: if value is None: return None self._reuse_row._values = [ self._field_data_converters[i].to_internal(item) for i, item in enumerate(value[1]) ] self._reuse_row.set_row_kind(RowKind(value[0])) return self._reuse_row
def pickled_bytes_to_python_converter(data, field_type): if isinstance(field_type, RowTypeInfo): row_kind = RowKind(int.from_bytes(data[0], 'little')) data = zip(list(data[1:]), field_type.get_field_types()) fields = [] for d, d_type in data: fields.append(pickled_bytes_to_python_converter(d, d_type)) row = Row.of_kind(row_kind, *fields) return row else: data = pickle.loads(data) if field_type == Types.SQL_TIME(): seconds, microseconds = divmod(data, 10**6) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return datetime.time(hours, minutes, seconds, microseconds) elif field_type == Types.SQL_DATE(): return field_type.from_internal_type(data) elif field_type == Types.SQL_TIMESTAMP(): return field_type.from_internal_type(int(data.timestamp() * 10**6)) elif field_type == Types.FLOAT(): return field_type.from_internal_type(ast.literal_eval(data)) elif isinstance( field_type, (BasicArrayTypeInfo, PrimitiveArrayTypeInfo, ObjectArrayTypeInfo)): element_type = field_type._element_type elements = [] for element_bytes in data: elements.append( pickled_bytes_to_python_converter(element_bytes, element_type)) return elements elif isinstance(field_type, MapTypeInfo): key_type = field_type._key_type_info value_type = field_type._value_type_info zip_kv = zip(data[0], data[1]) return dict((pickled_bytes_to_python_converter(k, key_type), pickled_bytes_to_python_converter(v, value_type)) for k, v in zip_kv) elif isinstance(field_type, ListTypeInfo): element_type = field_type.elem_type elements = [] for element_bytes in data: elements.append( pickled_bytes_to_python_converter(element_bytes, element_type)) return elements else: return field_type.from_internal_type(data)
def decode_from_stream(self, in_stream, length=0) -> Row: row_kind_and_null_mask = self._mask_utils.read_mask(in_stream) fields = [None if row_kind_and_null_mask[idx + ROW_KIND_BIT_SIZE] else self._field_coders[idx].decode_from_stream(in_stream) for idx in range(0, self._field_count)] # compute the row_kind value row_kind_value = 0 for i in range(ROW_KIND_BIT_SIZE): row_kind_value += int(row_kind_and_null_mask[i]) * 2 ** i row = Row(*fields) row.set_field_names(self._field_names) row.set_row_kind(RowKind(row_kind_value)) return row