def retrieve_object_from_cache(logger, cache_ids, cache_queue, identifier, parameter_name, user_function, cache_profiler): # noqa # type: (typing.Any, typing.Any, Queue, str, str, typing.Callable, bool) -> typing.Any """ Retrieve an object from the given cache proxy dict. :param logger: Logger where to push messages. :param cache_ids: Cache proxy dictionary. :param cache_queue: Cache notification queue. :param identifier: Object identifier. :param parameter_name: Parameter name. :param user_function: Function name. :param cache_profiler: If cache profiling is enabled. :return: The object from cache. """ with event_inside_worker(RETRIEVE_OBJECT_FROM_CACHE_EVENT): emit_manual_event_explicit(BINDING_DESERIALIZATION_CACHE_SIZE_TYPE, 0) identifier = __get_file_name__(identifier) if __debug__: logger.debug(HEADER + "Retrieving: " + str(identifier)) obj_id, obj_shape, obj_d_type, _, obj_hits, shared_type = cache_ids[ identifier] # noqa: E501 output = None # type: typing.Any existing_shm = None # type: typing.Any object_size = 0 if shared_type == SHARED_MEMORY_TAG: existing_shm = SharedMemory(name=obj_id) output = np.ndarray(obj_shape, dtype=obj_d_type, buffer=existing_shm.buf) # noqa: E501 object_size = len(existing_shm.buf) elif shared_type == SHAREABLE_LIST_TAG: existing_shm = ShareableList(name=obj_id) output = list(existing_shm) object_size = len(existing_shm.shm.buf) elif shared_type == SHAREABLE_TUPLE_TAG: existing_shm = ShareableList(name=obj_id) output = tuple(existing_shm) object_size = len(existing_shm.shm.buf) # Currently unsupported since conversion requires lists of lists. # elif shared_type == SHAREABLE_DICT_TAG: # existing_shm = ShareableList(name=obj_id) # output = dict(existing_shm) else: raise PyCOMPSsException("Unknown cacheable type.") if __debug__: logger.debug(HEADER + "Retrieved: " + str(identifier)) emit_manual_event_explicit(BINDING_DESERIALIZATION_CACHE_SIZE_TYPE, object_size) # Profiling filename = filename_cleaned(identifier) function_name = function_cleaned(user_function) if cache_profiler: cache_queue.put(("GET", (filename, parameter_name, function_name))) # Add hit cache_ids[identifier][4] = obj_hits + 1 return output, existing_shm
def retrieve_object_from_cache(logger, cache_ids, identifier): # noqa # type: (..., ..., str) -> ... """ Retrieve an object from the given cache proxy dict. :param logger: Logger where to push messages. :param cache_ids: Cache proxy dictionary. :param identifier: Object identifier. :return: The object from cache. """ emit_manual_event_explicit(TASK_EVENTS_DESERIALIZE_SIZE_CACHE, 0) identifier = __get_file_name__(identifier) if __debug__: logger.debug(HEADER + "Retrieving: " + str(identifier)) obj_id, obj_shape, obj_d_type, _, obj_hits, shared_type = cache_ids[ identifier] # noqa: E501 size = 0 if shared_type == SHARED_MEMORY_TAG: existing_shm = SharedMemory(name=obj_id) size = len(existing_shm.buf) output = np.ndarray(obj_shape, dtype=obj_d_type, buffer=existing_shm.buf) # noqa: E501 elif shared_type == SHAREABLE_LIST_TAG: existing_shm = ShareableList(name=obj_id) size = len(existing_shm.shm.buf) output = list(existing_shm) elif shared_type == SHAREABLE_TUPLE_TAG: existing_shm = ShareableList(name=obj_id) size = len(existing_shm.shm.buf) output = tuple(existing_shm) # Currently unsupported since conversion requires lists of lists. # elif shared_type == SHAREABLE_DICT_TAG: # existing_shm = ShareableList(name=obj_id) # output = dict(existing_shm) else: raise PyCOMPSsException("Unknown cacheable type.") if __debug__: logger.debug(HEADER + "Retrieved: " + str(identifier)) emit_manual_event_explicit(TASK_EVENTS_DESERIALIZE_SIZE_CACHE, size) cache_ids[identifier][4] = obj_hits + 1 return output, existing_shm
def insert_object_into_cache(logger, cache_queue, obj, f_name, parameter, user_function): # noqa # type: (typing.Any, Queue, typing.Any, str, str, typing.Callable) -> None """ Put an object into cache. :param logger: Logger where to push messages. :param cache_queue: Cache notification queue. :param obj: Object to store. :param f_name: File name that corresponds to the object (used as id). :param parameter: Parameter name. :param user_function: Function. :return: None """ with event_inside_worker(INSERT_OBJECT_INTO_CACHE_EVENT): function = function_cleaned(user_function) f_name = __get_file_name__(f_name) if __debug__: logger.debug(HEADER + "Inserting into cache (%s): %s" % (str(type(obj)), str(f_name))) try: inserted = True if isinstance(obj, np.ndarray): emit_manual_event_explicit( BINDING_SERIALIZATION_CACHE_SIZE_TYPE, 0) shape = obj.shape d_type = obj.dtype size = obj.nbytes shm = SHARED_MEMORY_MANAGER.SharedMemory(size=size) # noqa within_cache = np.ndarray(shape, dtype=d_type, buffer=shm.buf) within_cache[:] = obj[:] # Copy contents new_cache_id = shm.name cache_queue.put(("PUT", (f_name, new_cache_id, shape, d_type, size, SHARED_MEMORY_TAG, parameter, function))) # noqa: E501 elif isinstance(obj, list): emit_manual_event_explicit( BINDING_SERIALIZATION_CACHE_SIZE_TYPE, 0) sl = SHARED_MEMORY_MANAGER.ShareableList(obj) # noqa new_cache_id = sl.shm.name size = total_sizeof(obj) cache_queue.put(("PUT", (f_name, new_cache_id, 0, 0, size, SHAREABLE_LIST_TAG, parameter, function))) # noqa: E501 elif isinstance(obj, tuple): emit_manual_event_explicit( BINDING_SERIALIZATION_CACHE_SIZE_TYPE, 0) sl = SHARED_MEMORY_MANAGER.ShareableList(obj) # noqa new_cache_id = sl.shm.name size = total_sizeof(obj) cache_queue.put(("PUT", (f_name, new_cache_id, 0, 0, size, SHAREABLE_TUPLE_TAG, parameter, function))) # noqa: E501 # Unsupported dicts since they are lists of lists when converted. # elif isinstance(obj, dict): # # Convert dict to list of tuples # list_tuples = list(zip(obj.keys(), obj.values())) # sl = SHARED_MEMORY_MANAGER.ShareableList(list_tuples) # noqa # new_cache_id = sl.shm.name # size = total_sizeof(obj) # cache_queue.put(("PUT", (f_name, new_cache_id, 0, 0, size, SHAREABLE_DICT_TAG, parameter, function))) # noqa: E501 else: inserted = False if __debug__: logger.debug( HEADER + "Can not put into cache: Not a [np.ndarray | list | tuple ] object" ) # noqa: E501 if inserted: emit_manual_event_explicit( BINDING_SERIALIZATION_CACHE_SIZE_TYPE, size) if __debug__ and inserted: logger.debug(HEADER + "Inserted into cache: " + str(f_name) + " as " + str(new_cache_id)) # noqa: E501 except KeyError as e: # noqa if __debug__: logger.debug( HEADER + "Can not put into cache. It may be a [np.ndarray | list | tuple ] object containing an unsupported type" ) # noqa: E501 logger.debug(str(e))
def deserialize_from_handler(handler, show_exception=True): # type: (..., bool) -> object """ Deserialize an object from a file. :param handler: File name from where the object is going to be deserialized. :param show_exception: Show exception if happen (only with debug). :return: The object and if the handler has to be closed. :raises SerializerException: If deserialization can not be done. """ # Retrieve the used library (if possible) emit_manual_event_explicit(DESERIALIZATION_SIZE_EVENTS, 0) if hasattr(handler, 'name'): emit_manual_event_explicit( DESERIALIZATION_OBJECT_NUM, (abs(hash(os.path.basename(handler.name))) % platform_c_maxint)) original_position = None try: original_position = handler.tell() serializer = IDX2LIB[int(handler.read(4))] except KeyError: # The first 4 bytes return a value that is not within IDX2LIB handler.seek(original_position) error_message = 'Handler does not refer to a valid PyCOMPSs object' raise SerializerException(error_message) close_handler = True try: if DISABLE_GC: # Disable the garbage collector while serializing -> performance? gc.disable() if serializer is numpy and NUMPY_AVAILABLE: ret = serializer.load(handler, allow_pickle=False) elif serializer is pyarrow and PYARROW_AVAILABLE: ret = pyarrow.ipc.open_file(handler) if isinstance(ret, pyarrow.ipc.RecordBatchFileReader): close_handler = False else: ret = serializer.load(handler) # Special case: deserialized obj wraps a generator if isinstance(ret, tuple) and \ ret and \ isinstance(ret[0], GeneratorIndicator): ret = convert_to_generator(ret[1]) if DISABLE_GC: # Enable the garbage collector and force to clean the memory gc.enable() gc.collect() emit_manual_event_explicit(DESERIALIZATION_SIZE_EVENTS, handler.tell()) emit_manual_event_explicit(DESERIALIZATION_OBJECT_NUM, 0) return ret, close_handler except Exception: if DISABLE_GC: gc.enable() if __debug__ and show_exception: print('ERROR! Deserialization with %s failed.' % str(serializer)) try: traceback.print_exc() except AttributeError: # Bug fixed in 3.5 - issue10805 pass raise SerializerException('Cannot deserialize object')
def serialize_to_handler(obj, handler): # type: (object, ...) -> None """ Serialize an object to a handler. :param obj: Object to be serialized. :param handler: A handler object. It must implement methods like write, writeline and similar stuff. :return: none :raises SerializerException: If something wrong happens during serialization. """ emit_manual_event_explicit(SERIALIZATION_SIZE_EVENTS, 0) if hasattr(handler, 'name'): emit_manual_event_explicit( SERIALIZATION_OBJECT_NUM, (abs(hash(os.path.basename(handler.name))) % platform_c_maxint)) if DISABLE_GC: # Disable the garbage collector while serializing -> more performance? gc.disable() # Get the serializer priority serializer_priority = get_serializer_priority(obj) i = 0 success = False original_position = handler.tell() # Lets try the serializers in the given priority while i < len(serializer_priority) and not success: # Reset the handlers pointer to the first position handler.seek(original_position) serializer = serializer_priority[i] handler.write(bytearray('%04d' % LIB2IDX[serializer], 'utf8')) # Special case: obj is a generator if isinstance(obj, types.GeneratorType): try: pickle_generator(obj, handler, serializer) success = True except Exception: # noqa if __debug__: traceback.print_exc() # General case else: try: # If it is a numpy object then use its saving mechanism if serializer is numpy and \ NUMPY_AVAILABLE and \ (isinstance(obj, numpy.ndarray) or isinstance(obj, numpy.matrix)): serializer.save(handler, obj, allow_pickle=False) elif serializer is pyarrow and \ PYARROW_AVAILABLE and \ object_belongs_to_module(obj, "pyarrow"): writer = pyarrow.ipc.new_file(handler, obj.schema) # noqa writer.write(obj) writer.close() else: serializer.dump(obj, handler, protocol=serializer.HIGHEST_PROTOCOL) success = True except Exception: # noqa success = False i += 1 emit_manual_event_explicit(SERIALIZATION_SIZE_EVENTS, handler.tell()) emit_manual_event_explicit(SERIALIZATION_OBJECT_NUM, 0) if DISABLE_GC: # Enable the garbage collector and force to clean the memory gc.enable() gc.collect() # if ret_value is None then all the serializers have failed if not success: try: traceback.print_exc() except AttributeError: # Bug fixed in 3.5 - issue10805 pass raise SerializerException('Cannot serialize object %s' % obj)