def deserialize_from_bytes(self, data_frame, column_serializers): """ Deserialize all cells in the provided data frame from a bytes representation (inplace). @param data_frame a pandas.DataFrame containing columns to deserialize @param column_serializers dict containing column names present in data_frame as keys and deserializer_ids as values. A deserializer_id should be the id of the java extension point on which the deserializer is registered. Each column identified by the dict keys is deserialized using the deserializer provided by the TypeExtensionManager for the given deserializer_id. """ # print('Data frame: ' + str(data_frame) + '\nserializers: ' + str(column_serializers) + '\n') for column in column_serializers: deserializer = self._type_extension_manager.get_deserializer_by_id( column_serializers[column]) for i in range(len(data_frame)): if debug_util.is_debug_enabled(): lastp = -1 if (i * 100 / len(data_frame)) % 5 == 0 and int( i * 100 / len(data_frame)) != lastp: debug_util.debug_msg( str(i * 100 / len(data_frame)) + ' percent done (deserialize)') # lastp = int(i * 100/len(data_frame)) col_idx = data_frame.columns.get_loc(column) # Using bracket accessor is necessary here for ensuring that there are no unwanted type conversions. value = data_frame[column][data_frame.index[i]] if isinstance(value, numpy.float64) and numpy.isnan(value): value = None if value: if isinstance(value, list): new_list = [] for inner_value in value: if isinstance(inner_value, numpy.float64 ) and numpy.isnan(inner_value): inner_value = None if inner_value: new_list.append( deserializer.deserialize(inner_value)) else: new_list.append(None) data_frame.iat[i, col_idx] = new_list elif isinstance(value, set): new_set = set() for inner_value in value: if isinstance(inner_value, numpy.float64 ) and numpy.isnan(inner_value): inner_value = None if inner_value: new_set.add( deserializer.deserialize(inner_value)) else: new_set.add(None) data_frame.iat[i, col_idx] = new_set else: data_frame.iat[i, col_idx] = deserializer.deserialize( value) else: data_frame.iat[i, col_idx] = None
def serialize_objects_to_bytes(self, data_frame, column_serializers): """ Serialize all cells in the provided data frame to a bytes representation (inplace). @param data_frame a pandas.DataFrame containing columns to serialize @param column_serializers dict containing column names present in data_frame as keys and serializer_ids as values. A serializer_id should be the id of the java extension point on which the serializer is registered. Each column identified by the dict keys is serialized using the serializer provided by the TypeExtensionManager for the given serializer_id. """ for column in column_serializers: serializer = self._type_extension_manager.get_serializer_by_id( column_serializers[column]) col_idx = data_frame.columns.get_loc(column) if data_frame[column].dtype != 'object': data_frame[column] = data_frame[column].astype('object') for i in range(len(data_frame)): if debug_util.is_debug_enabled(): lastp = -1 if (i * 100 / len(data_frame)) % 5 == 0 and int( i * 100 / len(data_frame)) != lastp: debug_util.debug_msg( str(i * 100 / len(data_frame)) + ' percent done (serialize)') # lastp = int(i * 100/len(data_frame)) # Using bracket acessor is necessary here for ensuring that there are # no unwanted type conversions value = data_frame[column][data_frame.index[i]] if value is not None: if isinstance(value, list): new_list = [] for inner_value in value: if inner_value is None: new_list.append(None) else: new_list.append( serializer.serialize(inner_value)) data_frame.iat[i, col_idx] = new_list elif isinstance(value, set): new_set = set() for inner_value in value: if inner_value is None: new_set.add(None) else: new_set.add(serializer.serialize(inner_value)) data_frame.iat[i, col_idx] = new_set else: data_frame.iat[i, col_idx] = serializer.serialize(value)