Exemplo n.º 1
0
    def run_remote(self, object_id, backend_id, operation_name, value):
        session_id = self.get_session_id()
        implementation_id = self.get_implementation_id(object_id, operation_name)

        try:
            execution_client = self.ready_clients[backend_id]
        except KeyError:
            exeenv = self.get_execution_environment_info(backend_id)
            execution_client = EEClient(exeenv.hostname, exeenv.port)
            self.ready_clients[backend_id] = execution_client

        operation = self.get_operation_info(object_id, operation_name)
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=(value,),
            iface_bitmaps=None,
            params_spec=operation.params,
            params_order=operation.paramsOrder,
            hint_volatiles=None,
            runtime=self
        )

        ret = execution_client.ds_execute_implementation(object_id, implementation_id,
            session_id, serialized_params)

        if ret is not None:
            return DeserializationLibUtilsSingleton.deserialize_return(ret, None, operation.returnType, self)
Exemplo n.º 2
0
    def update_object(self, into_object, from_object):
        session_id = self.get_session_id()
        
        backend_id = into_object.get_location()
        try:
            execution_client = self.ready_clients[backend_id]
        except KeyError:
            exeenv = self.get_execution_environment_info(backend_id)
            execution_client = EEClient(exeenv.hostname, exeenv.port)
            self.ready_clients[backend_id] = execution_client
        
        # We serialize objects like volatile parameters
        parameters = list()
        parameters.append(from_object)
        # TODO: modify serialize_params_or_return to not require this
        params_order = list()
        params_order.append("object")
        params_spec = dict()
        params_spec["object"] = "DataClayObject"  # not used, see serialized_params_or_return
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=parameters,
            iface_bitmaps=None,
            params_spec=params_spec,
            params_order=params_order,
            hint_volatiles=backend_id,
            runtime=self,
            recursive=True,
            for_update=True)
        
        vol_objects = serialized_params.vol_objs
        if vol_objects is not None:
            new_ids = dict()
            
            for tag in vol_objects:
                cur_oid = serialized_params.vol_objs[tag].object_id
                if cur_oid not in new_ids:
                    if cur_oid == from_object.get_object_id():
                        new_ids[cur_oid] = into_object.get_object_id()
                    else:
                        new_ids[cur_oid] = uuid.uuid4()

                serialized_params.vol_objs[tag] = ObjectWithDataParamOrReturn(new_ids[cur_oid],
                                                                              serialized_params.vol_objs[tag].class_id,
                                                                              serialized_params.vol_objs[tag].metadata,
                                                                              serialized_params.vol_objs[tag].obj_bytes)

            
            for vol_tag in vol_objects:
                oids = serialized_params.vol_objs[vol_tag].metadata.tags_to_oids
                for tag, oid in oids.items():
                    if oid in new_ids:
                        try:
                            serialized_params.vol_objs[vol_tag].metadata.tags_to_oids[tag] = new_ids[oid]
                        except KeyError: 
                            pass
        

        execution_client.ds_update_object(session_id, into_object.get_object_id(), serialized_params)
Exemplo n.º 3
0
    def gc_collect_internal(self, object_to_update):
        """
        @postcondition: Update object in db or store it if volatile (and register in LM)
        @param object_to_update: object to update
        """
        try:
            self.logger.debug("[==GCUpdate==] Updating object %s",
                              object_to_update.get_object_id())
            """ Call EE update """
            if object_to_update.is_pending_to_register():
                self.logger.debug(
                    f"[==GCUpdate==] Storing and registering object {object_to_update.get_object_id()}"
                )
                obj_bytes = SerializationLibUtilsSingleton.serialize_for_db_gc(
                    object_to_update, False, None)
                self.exec_env.register_and_store_pending(
                    object_to_update, obj_bytes, True)
            elif object_to_update.is_dirty():
                self.logger.debug("[==GCUpdate==] Updating dirty object %s ",
                                  object_to_update.get_object_id())
                obj_bytes = SerializationLibUtilsSingleton.serialize_for_db_gc(
                    object_to_update, False, None)
                self.runtime.update_to_sl(object_to_update.get_object_id(),
                                          obj_bytes, True)
            else:
                # TODO: how to check if GlobalGC is enabled?
                self.logger.debug(
                    "[==GCUpdate==] Going to update dirty object in database object with ID %s ",
                    object_to_update.get_object_id())
                obj_bytes = SerializationLibUtilsSingleton.serialize_for_db_gc_not_dirty(
                    object_to_update, False, None, False)
                if obj_bytes is not None:
                    ref_counting_bytes = DeserializationLibUtilsSingleton.extract_reference_counting(
                        obj_bytes)
                    self.runtime.update_to_sl(object_to_update.get_object_id(),
                                              ref_counting_bytes, False)
                else:
                    self.logger.debug(
                        "[==GCUpdate==] %s object is not dirty and have no references. Not going to SL",
                        object_to_update.get_object_id())

        except:
            # do nothing
            traceback.print_exc()
        """ TODO: set datasetid for GC if set by user """
Exemplo n.º 4
0
 def synchronize(self, instance, operation_name, params):
     session_id = self.get_session_id()
     object_id = instance.get_object_id()
     operation = self.get_operation_info(instance.get_object_id(),
                                         operation_name)
     implementation_id = self.get_implementation_id(
         instance.get_object_id(), operation_name)
     # === SERIALIZE PARAMETERS ===
     serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
         params=[params],
         iface_bitmaps=None,
         params_spec=operation.params,
         params_order=operation.paramsOrder,
         hint_volatiles=instance.get_hint(),
         runtime=self)
     self.execution_environment.synchronize(session_id, object_id,
                                            implementation_id,
                                            serialized_params)
Exemplo n.º 5
0
    def synchronize_federated(self, instance, params, operation_name,
                              dc_info_id):
        self.logger.debug(
            "Calling external dataClay to run %s operation in object %s",
            operation_name, instance.get_object_id())
        operation = self.get_operation_info(instance.get_object_id(),
                                            operation_name)
        implementation_id = self.get_implementation_id(
            instance.get_object_id(), operation_name)
        # === SERIALIZE PARAMETERS ===
        # Between DC - DC , ifaceBitMaps = null
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=params,
            iface_bitmaps=None,
            params_spec=operation.params,
            params_order=operation.paramsOrder,
            hint_volatiles=instance.get_hint(),
            runtime=self)

        dc_info = self.get_external_dataclay_info(dc_info_id)
        hosts = dc_info.hosts
        ports = dc_info.ports
        for i in range(0, len(hosts)):
            try:
                lm_client = self.get_lm_api(hosts[i], ports[i])
                self.logger.debug(
                    "[==JUMP==] Request execution to external dataClay %s with host %s and port %s for object %s",
                    dc_info_id, hosts[i], ports[i], instance.get_object_id())

                lm_client.synchronize_federated_object(
                    self.get_dataclay_id(), instance.get_object_id(),
                    implementation_id, serialized_params)
            except:
                traceback.print_exc()
                if i + 1 == len(hosts):
                    raise RuntimeError(
                        "[dataClay] ERROR: Cannot connect to external dataClay with ID %s"
                        % (str(dc_info_id)))
Exemplo n.º 6
0
 def synchronize(self, instance, operation_name, params):
     session_id = self.get_session_id()
     object_id = instance.get_object_id()
     dest_backend_id = self.get_location(instance.get_object_id())
     operation = self.get_operation_info(instance.get_object_id(),
                                         operation_name)
     implementation_id = self.get_implementation_id(
         instance.get_object_id(), operation_name)
     # === SERIALIZE PARAMETER ===
     serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
         params=[params],
         iface_bitmaps=None,
         params_spec=operation.params,
         params_order=operation.paramsOrder,
         hint_volatiles=instance.get_hint(),
         runtime=self)
     try:
         execution_client = self.ready_clients[dest_backend_id]
     except KeyError:
         exeenv = self.get_execution_environment_info(dest_backend_id)
         execution_client = EEClient(exeenv.hostname, exeenv.port)
         self.ready_clients[dest_backend_id] = execution_client
     execution_client.synchronize(session_id, object_id, implementation_id,
                                  serialized_params)
Exemplo n.º 7
0
    def internal_store(self, instance, make_persistent=True):
        """Perform the storage (StoreObject call) for an instance.
    
        :param instance: The DataClayObject willing to be stored.
        :param make_persistent: Flag, True when DS_STORE_OBJECT should be called
        and False when DS_UPSERT_OBJECT is the method to be called.
        :return: A dictionary containing the classes for all stored objects.
    
        This function works for two main scenarios: the makePersistent one (in
        which the instance is not yet persistent) and the update (in which the
        instance is persistent).
    
        The return dictionary is the same in both cases, but note that the update
        should not use the provided instance for updating metadata to the LM.
        """
        logger.debug("Store procedure for instance %r", instance)
        client = self.ready_clients["@STORAGE"]

        pending_objs = [instance]
        stored_objects_classes = dict()
        serialized_objs = list()
        reg_infos = list()
        dataset_id = self.execution_environment.thread_local_info.dataset_id

        while pending_objs:
            current_obj = pending_objs.pop()
            # Lock and make sure it is loaded
            current_obj_id = current_obj.get_object_id()
            self.lock(current_obj_id)  # Avoid GC clean object while storing it
            try:
                if not current_obj.is_loaded():
                    current_obj = self.get_or_new_instance_from_db(
                        current_obj_id, False)

                dcc_extradata = current_obj.get_class_extradata()
                object_id = current_obj.get_object_id()

                if make_persistent:
                    # Ignore already persistent objects
                    if current_obj.is_persistent():
                        continue

                    dcc_extradata = current_obj.get_class_extradata()
                    infos = [
                        object_id, dcc_extradata.class_id, self.
                        execution_environment.thread_local_info.session_id,
                        dataset_id
                    ]
                    reg_infos.append(infos)

                # This object will soon be persistent
                current_obj.set_persistent(True)
                current_obj.set_hint(settings.environment_id)
                # Just in case (should have been loaded already)
                logger.debug(
                    "Setting loaded to true from internal store for object %s"
                    % str(object_id))
                current_obj.set_loaded(True)

                # First store since others OIDs are recursively created while creating MetaData
                if not object_id:
                    if not make_persistent:
                        raise DataClayException(
                            "Objects should never be uuid-less for non-make_persistent use cases"
                        )
                    object_id = uuid.uuid4()
                    current_obj.set_object_id(object_id)
                    current_obj.set_dataset_id(self.execution_environment.
                                               thread_local_info.dataset_id)

                logger.debug(
                    "Ready to make persistent object {%s} of class %s {%s}" %
                    (object_id, dcc_extradata.classname,
                     dcc_extradata.class_id))

                stored_objects_classes[object_id] = dcc_extradata.class_id

                # If we are not in a make_persistent, the dataset_id hint is null (?)
                serialized_objs.append(
                    SerializationLibUtilsSingleton.serialize_dcobj_with_data(
                        current_obj, pending_objs, False, None, self, False))
            finally:
                self.unlock(current_obj_id)

        if make_persistent:
            lm_client = self.ready_clients["@LM"]
            lm_client.register_objects(reg_infos, settings.environment_id,
                                       None, None, LANG_PYTHON)
            client.ds_store_objects(
                self.execution_environment.thread_local_info.session_id,
                serialized_objs, False, None)
        else:
            client.ds_upsert_objects(
                self.execution_environment.thread_local_info.session_id,
                serialized_objs)
Exemplo n.º 8
0
    def serialize(self, io_file, ignore_user_types, iface_bitmaps,
                  cur_serialized_objs, pending_objs, reference_counting):
        # Reference counting information
        # First integer represent the position in the buffer in which
        # reference counting starts. This is done to avoid "holding"
        # unnecessary information during a store or update in disk.

        # in new serialization, this will be done through padding
        # TODO: use padding instead once new serialization is implemented
        IntegerWrapper().write(io_file, 0)

        cur_master_loc = self.get_master_location()
        if cur_master_loc is not None:
            StringWrapper().write(io_file, str(cur_master_loc))
        else:
            StringWrapper().write(io_file, str("x"))

        if hasattr(self, "__getstate__"):
            # The object has a user-defined serialization method.
            # Use that
            dco_extradata = self.__dclay_instance_extradata
            last_loaded_flag = dco_extradata.loaded_flag
            last_persistent_flag = dco_extradata.persistent_flag
            dco_extradata.loaded_flag = True
            dco_extradata.persistent_flag = False

            # Use pickle to the result of the serialization
            if six.PY2:
                import cPickle as pickle
            elif six.PY3:
                import _pickle as pickle

            state = pickle.dumps(self.__getstate__(), protocol=-1)

            # Leave the previous value, probably False & True`
            dco_extradata.loaded_flag = last_loaded_flag
            dco_extradata.persistent_flag = last_persistent_flag

            StringWrapper(mode="binary").write(io_file, state)

        else:
            # Regular dataClay provided serialization
            # Get the list of properties, making sure it is sorted
            properties = sorted(self.get_class_extradata().properties.values(),
                                key=attrgetter('position'))

            logger.verbose("Serializing list of properties: %s", properties)

            for p in properties:

                try:
                    value = object.__getattribute__(
                        self, "%s%s" % (DCLAY_PROPERTY_PREFIX, p.name))
                except AttributeError:
                    value = None

                logger.verbose("Serializing property %s with value %s ",
                               p.name, value)

                if value is None:
                    BooleanWrapper().write(io_file, False)
                else:
                    if isinstance(p.type, UserType):
                        if not ignore_user_types:
                            BooleanWrapper().write(io_file, True)
                            SerializationLibUtilsSingleton.serialize_association(
                                io_file, value, cur_serialized_objs,
                                pending_objs, reference_counting)
                        else:
                            BooleanWrapper().write(io_file, False)
                    else:
                        BooleanWrapper().write(io_file, True)
                        pck = Pickler(io_file, protocol=-1)
                        pck.persistent_id = PersistentIdPicklerHelper(
                            cur_serialized_objs, pending_objs,
                            reference_counting)
                        pck.dump(value)

        # Reference counting
        # TODO: this should be removed in new serialization
        # TODO: (by using paddings to directly access reference counters inside metadata)

        cur_stream_pos = io_file.tell()
        io_file.seek(0)
        IntegerWrapper().write(io_file, cur_stream_pos)
        io_file.seek(cur_stream_pos)
        reference_counting.serialize_reference_counting(
            self.get_object_id(), io_file)
Exemplo n.º 9
0
    def call_execute_to_ds(self, instance, parameters, operation_name, exeenv_id, using_hint):
        
        object_id = instance.get_object_id()
        operation = self.get_operation_info(object_id, operation_name)
        session_id = self.get_session_id()
        implementation_id = self.get_implementation_id(object_id, operation_name)

        # // === SERIALIZE PARAMETERS === //
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=parameters,
            iface_bitmaps=None,
            params_spec=operation.params,
            params_order=operation.paramsOrder,
            hint_volatiles=exeenv_id,
            runtime=self)
        
        if serialized_params is not None and serialized_params.vol_objs is not None:
            for param in serialized_params.vol_objs.values():
                self.volatile_parameters_being_send.add(param.object_id)
            
        # // === EXECUTE === //
        max_retry = Configuration.MAX_EXECUTION_RETRIES
        num_misses = 0
        executed = False
        for _ in range(max_retry):
            try:
                self.logger.verbose("Obtaining API for remote execution in %s ", exeenv_id)
                execution_client = self.ready_clients[exeenv_id]
            except KeyError:
                exeenv = self.get_execution_environment_info(exeenv_id)
                self.logger.debug("Not found in cache ExecutionEnvironment {%s}! Starting it at %s:%d",
                               exeenv_id, exeenv.hostname, exeenv.port)
                execution_client = EEClient(exeenv.hostname, exeenv.port)
                self.ready_clients[exeenv_id] = execution_client
    
            try:
                self.logger.verbose("Calling remote EE %s ", exeenv_id)
                ret = execution_client.ds_execute_implementation(
                    object_id,
                    implementation_id,
                    session_id,
                    serialized_params)
                executed = True
                break
            
            except (DataClayException, RpcError) as dce:
                self.logger.warning("Execution resulted in an error, retrying...", exc_info=dce)

                is_race_condition = False
                if serialized_params is not None and serialized_params.persistent_refs is not None:
                    for param in serialized_params.persistent_refs:
                        if param.object_id in self.volatile_parameters_being_send:
                            is_race_condition = True
                            break
                if not is_race_condition:
                    num_misses = num_misses + 1
                    self.logger.debug("Exception dataclay during execution. Retrying...")
                    self.logger.debug(str(dce))

                    locations = self.get_from_heap(object_id).get_replica_locations()
                    if locations is None or len(locations) == 0:
                        try:
                            locations = self.get_metadata(object_id).locations
                            new_location = False
                        except DataClayException:
                            locations = None
    
                    if locations is None:
                        self.logger.warning("Execution failed and no metadata available. Cannot continue")
                        raise
    
                    for loc in locations:
                        self.logger.debug("Found location %s" % str(loc))
                        if loc != exeenv_id:
                            exeenv_id = loc
                            self.logger.debug("Found different location %s" % str(loc))
                            new_location = True
                            break
                        
                    if not new_location: 
                        exeenv_id = next(iter(locations))
                    if using_hint:
                        instance.set_hint(exeenv_id)
                    self.logger.debug("[==Miss Jump==] MISS. The object %s was not in the exec.location %s. Retrying execution." 
                                % (instance.get_object_id(), str(exeenv_id)));
    
        if serialized_params is not None and serialized_params.vol_objs is not None:
            for param in serialized_params.vol_objs.values():
                if num_misses > 0: 
                    #===========================================================
                    # if there was a miss, it means that the persistent object in which we were executing 
                    # was not in the choosen location. As you can see in the serialize parameters function above
                    # we provide the execution environment as hint to set to volatile parameters. In EE, before
                    # deserialization of volatiles we check if the persistent object in which to execute a method is
                    # there, if not, EE raises and exception. Therefore, if there was a miss, we know that the 
                    # hint we set in volatile parameters is wrong, because they are going to be deserialized/stored
                    # in the same location as the object with the method to execute
                    #===========================================================
                    param_instance = self.get_from_heap(param.object_id)
                    param_instance.set_hint(exeenv_id)
                self.volatile_parameters_being_send.remove(param.object_id)
    
        if not executed: 
            raise RuntimeError("[dataClay] ERROR: Trying to execute remotely object  but something went wrong. "
                               "Maybe the object is still not stored (in case of asynchronous makepersistent) and "
                               "waiting time is not enough. Maybe the object does not exist anymore due to a remove. "
                               "Or Maybe an exception happened in the server and the call failed.")

        result = None
        if ret is None:
            self.logger.debug(f"Result of operation named {operation_name} received: None")
        else:
            self.logger.debug(f"Deserializing result of operation named {operation_name}, return type is {operation.returnType.signature}")
            result = DeserializationLibUtilsSingleton.deserialize_return(ret, None, operation.returnType, self)
            self.logger.debug(f"Deserialization of result of operation named {operation_name} successfully finished.")
        return result
Exemplo n.º 10
0
    def make_persistent(self, instance, alias, backend_id, recursive):
        """ This method creates a new Persistent Object using the provided stub
        instance and, if indicated, all its associated objects also Logic module API used for communication
        This function is called from a stub/execution class
        :param instance: Instance to make persistent
        :param backend_id: Indicates which is the destination backend
        :param recursive: Indicates if make persistent is recursive
        :param alias: Alias for the object
        :returns: ID of the backend in which te object was persisted.
        :type instance: DataClayObject
        :type backend_id: DataClayID
        :type recursive: boolean
        :type alias: string
        :rtype: DataClayID
        :raises RuntimeError: if backend id is UNDEFINED_LOCAL.
        """

        self.logger.debug(
            "Starting make persistent object for instance with id %s",
            instance.get_object_id())
        if backend_id is UNDEFINED_LOCAL:
            # This is a commonruntime end user pitfall,
            # @abarcelo thinks that it is nice
            # (and exceptionally detailed) error
            raise RuntimeError("""
                You are trying to use dataclay.api.LOCAL but either:
                  - dataClay has not been initialized properly
                  - LOCAL has been wrongly imported.
                
                Be sure to use LOCAL with:
                
                from dataclay import api
                
                and reference it with `api.LOCAL`
                
                Refusing the temptation to guess.""")
        location = instance.get_hint()
        if location is None:
            location = backend_id
            # Choose location if needed
            # If object is already persistent -> it must have a Hint (location = hint here)
            # If object is not persistent -> location is choosen (provided backend id or random, hash...).
            if location is None:
                location = self.choose_location(instance, alias)

        if not instance.is_persistent():
            if alias is not None:
                # Add a new alias to an object.
                # Use cases:
                # 1 - object was persisted without alias and not yet registered -> we need to register it with new alias.
                # 2 - object was persisted and it is already registered -> we only add a new alias
                # 3 - object was persisted with an alias and it must be already registered -> we add a new alias.

                # From client side, we cannot check if object is registered or not (we do not have isPendingToRegister like EE)
                # Therefore, we call LogicModule with all information for registration.
                reg_infos = list()
                reg_info = RegistrationInfo(
                    instance.get_object_id(),
                    instance.get_class_extradata().class_id,
                    self.get_session_id(), instance.get_dataset_id(), alias)
                reg_infos.append(reg_info)
                new_object_ids = self.ready_clients["@LM"].register_objects(
                    reg_infos, location, LANG_PYTHON)
                self.logger.debug(f"Received ids: {new_object_ids}")
                new_object_id = next(iter(new_object_ids))
                self.update_object_id(instance, new_object_id)

                self.alias_cache[alias] = instance.get_object_id(
                ), instance.get_class_extradata().class_id, location

            # === MAKE PERSISTENT === #
            self.logger.debug("Instance with object ID %s being send to EE",
                              instance.get_object_id())
            # set the default master location
            instance.set_master_location(location)
            instance.set_alias(alias)
            # We serialize objects like volatile parameters
            parameters = list()
            parameters.append(instance)
            params_order = list()
            params_order.append("object")
            params_spec = dict()
            params_spec[
                "object"] = "DataClayObject"  # not used, see serialized_params_or_return
            serialized_objs = SerializationLibUtilsSingleton.serialize_params_or_return(
                params=parameters,
                iface_bitmaps=None,
                params_spec=params_spec,
                params_order=params_order,
                hint_volatiles=location,
                runtime=self,
                recursive=recursive)

            # Avoid some race-conditions in communication (make persistent + execute where
            # execute arrives before).
            # TODO: fix volatiles under deserialization support for __setstate__ and __getstate__
            self.add_volatiles_under_deserialization(
                serialized_objs.vol_objs.values())

            # Get EE
            try:
                execution_client = self.ready_clients[location]
            except KeyError:
                exeenv = self.get_execution_environment_info(location)
                self.logger.debug(
                    "Not found in cache ExecutionEnvironment {%s}! Starting it at %s:%d",
                    location, exeenv.hostname, exeenv.port)
                execution_client = EEClient(exeenv.hostname, exeenv.port)
                self.ready_clients[location] = execution_client

            # Call EE
            self.logger.verbose("Calling make persistent to EE %s ", location)
            execution_client.make_persistent(settings.current_session_id,
                                             serialized_objs.vol_objs.values())

            # update the hint with the location, and return it
            instance.set_hint(location)

            # remove volatiles under deserialization
            self.remove_volatiles_under_deserialization(
                serialized_objs.vol_objs.values())

        object_id = instance.get_object_id()
        locations = set()
        locations.add(location)
        metadata_info = MetaDataInfo(object_id, False,
                                     instance.get_dataset_id(),
                                     instance.get_class_extradata().class_id,
                                     locations, alias, None)
        self.metadata_cache[object_id] = metadata_info
        return location