Пример #1
0
    def run_remote(self, object_id, backend_id, operation_name, value):
        session_id = self.get_session_id()
        implementation_id = self.get_implementation_id(object_id, operation_name)

        try:
            execution_client = self.ready_clients[backend_id]
        except KeyError:
            exeenv = self.get_execution_environment_info(backend_id)
            execution_client = EEClient(exeenv.hostname, exeenv.port)
            self.ready_clients[backend_id] = execution_client

        operation = self.get_operation_info(object_id, operation_name)
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=(value,),
            iface_bitmaps=None,
            params_spec=operation.params,
            params_order=operation.paramsOrder,
            hint_volatiles=None,
            runtime=self
        )

        ret = execution_client.ds_execute_implementation(object_id, implementation_id,
            session_id, serialized_params)

        if ret is not None:
            return DeserializationLibUtilsSingleton.deserialize_return(ret, None, operation.returnType, self)
Пример #2
0
    def start_autoregister(self, local_ip):
        """Start the autoregister procedure to introduce ourselves to the LogicModule."""
        self.execution_environment.prepareThread()

        logger.info("Start Autoregister with %s local_ip", local_ip)
        lm_client = self.execution_environment.get_runtime().ready_clients["@LM"]
    
        success = False
        retries = 0
        max_retries = Configuration.MAX_RETRY_AUTOREGISTER
        sleep_time = Configuration.RETRY_AUTOREGISTER_TIME / 1000
        execution_environment_id = self.execution_environment.get_execution_environment_id()
        while not success:
            try:
                storage_location_id = lm_client.autoregister_ee(execution_environment_id,
                    settings.dataservice_name,
                    local_ip,
                    settings.dataservice_port,
                    LANG_PYTHON)
            except Exception as e:
                logger.debug("Catched exception of type %s. Message:\n%s", type(e), e)
                if retries > max_retries:
                    logger.warn("Could not create channel, aborting (reraising exception)")
                    raise
                else:
                    logger.info("Could not create channel, retry #%d of %i in %i seconds", retries, max_retries, sleep_time)
                    # TODO: Not Very performing, find a better way
                    time.sleep(sleep_time)
                    retries += 1
            else:
                success = True
    
        logger.info("Current DataService autoregistered. Associated StorageLocationID: %s",
                    storage_location_id)
        settings.storage_id = storage_location_id
        settings.environment_id = execution_environment_id
    
        # Retrieve the storage_location connection data
        storage_location = lm_client.get_storage_location_for_ds(storage_location_id)
    
        logger.debug("StorageLocation data: {name: '%s', hostname: '%s', port: %d}",
                     storage_location.name,
                     storage_location.hostname,
                     storage_location.storageTCPPort)
    
        logger.info("Starting client to StorageLocation {%s} at %s:%d",
                    storage_location_id, storage_location.hostname, storage_location.storageTCPPort)
    
        storage_client = EEClient(storage_location.hostname, storage_location.storageTCPPort)
    
        # Leave the ready client to the Storage Location globally available
        self.execution_environment.get_runtime().ready_clients["@STORAGE"] = storage_client
        storage_client.associate_execution_environment(execution_environment_id)
        
        settings.logicmodule_dc_instance_id = lm_client.get_dataclay_id()
        logger.verbose("DataclayInstanceID is %s, storing client in cache", settings.logicmodule_dc_instance_id)

        self.execution_environment.get_runtime().ready_clients[settings.logicmodule_dc_instance_id] = self.execution_environment.get_runtime().ready_clients["@LM"]
Пример #3
0
    def update_object(self, into_object, from_object):
        session_id = self.get_session_id()
        
        backend_id = into_object.get_location()
        try:
            execution_client = self.ready_clients[backend_id]
        except KeyError:
            exeenv = self.get_execution_environment_info(backend_id)
            execution_client = EEClient(exeenv.hostname, exeenv.port)
            self.ready_clients[backend_id] = execution_client
        
        # We serialize objects like volatile parameters
        parameters = list()
        parameters.append(from_object)
        # TODO: modify serialize_params_or_return to not require this
        params_order = list()
        params_order.append("object")
        params_spec = dict()
        params_spec["object"] = "DataClayObject"  # not used, see serialized_params_or_return
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=parameters,
            iface_bitmaps=None,
            params_spec=params_spec,
            params_order=params_order,
            hint_volatiles=backend_id,
            runtime=self,
            recursive=True,
            for_update=True)
        
        vol_objects = serialized_params.vol_objs
        if vol_objects is not None:
            new_ids = dict()
            
            for tag in vol_objects:
                cur_oid = serialized_params.vol_objs[tag].object_id
                if cur_oid not in new_ids:
                    if cur_oid == from_object.get_object_id():
                        new_ids[cur_oid] = into_object.get_object_id()
                    else:
                        new_ids[cur_oid] = uuid.uuid4()

                serialized_params.vol_objs[tag] = ObjectWithDataParamOrReturn(new_ids[cur_oid],
                                                                              serialized_params.vol_objs[tag].class_id,
                                                                              serialized_params.vol_objs[tag].metadata,
                                                                              serialized_params.vol_objs[tag].obj_bytes)

            
            for vol_tag in vol_objects:
                oids = serialized_params.vol_objs[vol_tag].metadata.tags_to_oids
                for tag, oid in oids.items():
                    if oid in new_ids:
                        try:
                            serialized_params.vol_objs[vol_tag].metadata.tags_to_oids[tag] = new_ids[oid]
                        except KeyError: 
                            pass
        

        execution_client.ds_update_object(session_id, into_object.get_object_id(), serialized_params)
Пример #4
0
 def get_copy_of_object(self, from_object, recursive):
     session_id = self.get_session_id()
     
     backend_id = from_object.get_location()
     try:
         execution_client = self.ready_clients[backend_id]
     except KeyError:
         exeenv = self.get_execution_environment_info(backend_id)
         execution_client = EEClient(exeenv.hostname, exeenv.port)
         self.ready_clients[backend_id] = execution_client
     
     copiedObject = execution_client.ds_get_copy_of_object(session_id, from_object.get_object_id(), recursive)
     result = DeserializationLibUtilsSingleton.deserialize_params_or_return(copiedObject, None, None, None, self)
         
     return result[0]            
Пример #5
0
    def consolidate_version(self, version_id, version_hint):
        # IMPORTANT NOTE: pyclay is not able to replicate/versionate/consolidate Java or other language objects
        self.logger.debug(f"Starting consolidate version of {version_id}")
        session_id = self.get_session_id()
        backend_id_to_call = version_hint
        if backend_id_to_call is None:
            backend_id_to_call = self.get_location(version_id)

        try:
            execution_client = self.ready_clients[backend_id_to_call]
        except KeyError:
            backend_to_call = self.get_execution_environment_info(backend_id_to_call)
            execution_client = EEClient(backend_to_call.hostname, backend_to_call.port)
            self.ready_clients[backend_id_to_call] = execution_client

        execution_client.consolidate_version(session_id, version_id)
        self.logger.debug(f"Finished consolidate version of {version_id}")
Пример #6
0
 def detach_object_from_session(self, object_id, hint):
     try:
         cur_session = self.get_session_id()
         exec_location_id = hint
         if exec_location_id is None:
             exec_location_id = self.get_location(object_id)
         try:
             execution_client = self.ready_clients[exec_location_id]
         except KeyError:
             backend_to_call = self.get_execution_environment_info(
                 exec_location_id)
             execution_client = EEClient(backend_to_call.hostname,
                                         backend_to_call.port)
             self.ready_clients[exec_location_id] = execution_client
         execution_client.detach_object_from_session(object_id, cur_session)
     except:
         traceback.print_exc()
Пример #7
0
    def unfederate_from_backend(self, dc_obj,
                                external_execution_environment_id, recursive):
        object_id = dc_obj.get_object_id()
        hint = dc_obj.get_hint()
        session_id = self.get_session_id()
        self.logger.debug(
            "[==UnfederateObject==] Starting unfederation of object %s with ext backend %s, and session %s",
            object_id, external_execution_environment_id, session_id)
        exec_location_id = hint
        if exec_location_id is None:
            exec_location_id = self.get_location(object_id)
        try:
            execution_client = self.ready_clients[exec_location_id]
        except KeyError:
            exeenv = self.get_execution_environment_info(exec_location_id)
            execution_client = EEClient(exeenv.hostname, exeenv.port)
            self.ready_clients[exec_location_id] = execution_client

        execution_client.unfederate(session_id, object_id,
                                    external_execution_environment_id,
                                    recursive)
Пример #8
0
    def federate_to_backend(self, dc_obj, external_execution_environment_id,
                            recursive):
        object_id = dc_obj.get_object_id()
        hint = dc_obj.get_hint()
        session_id = self.get_session_id()
        exec_location_id = hint
        if exec_location_id is None:
            exec_location_id = self.get_location(object_id)
        try:
            execution_client = self.ready_clients[exec_location_id]
        except KeyError:
            exeenv = self.get_execution_environment_info(exec_location_id)
            execution_client = EEClient(exeenv.hostname, exeenv.port)
            self.ready_clients[exec_location_id] = execution_client

        self.logger.debug(
            "[==FederateObject==] Starting federation of object by %s calling EE %s with dest dataClay %s, and session %s",
            object_id, exec_location_id, external_execution_environment_id,
            session_id)
        execution_client.federate(session_id, object_id,
                                  external_execution_environment_id, recursive)
Пример #9
0
 def delete_alias(self, dc_obj):
     session_id = self.get_session_id()
     hint = dc_obj.get_hint()
     object_id = dc_obj.get_object_id()
     exec_location_id = hint
     if exec_location_id is None:
         exec_location_id = self.get_location(object_id)
     try:
         execution_client = self.ready_clients[exec_location_id]
     except KeyError:
         backend_to_call = self.get_execution_environment_info(
             exec_location_id)
         execution_client = EEClient(backend_to_call.hostname,
                                     backend_to_call.port)
         self.ready_clients[exec_location_id] = execution_client
     execution_client.delete_alias(session_id, object_id)
     alias = dc_obj.get_alias()
     if alias is not None:
         if alias in self.alias_cache:
             del self.alias_cache[alias]
     dc_obj.set_alias(None)
Пример #10
0
 def synchronize(self, instance, operation_name, params):
     session_id = self.get_session_id()
     object_id = instance.get_object_id()
     dest_backend_id = self.get_location(instance.get_object_id())
     operation = self.get_operation_info(instance.get_object_id(),
                                         operation_name)
     implementation_id = self.get_implementation_id(
         instance.get_object_id(), operation_name)
     # === SERIALIZE PARAMETER ===
     serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
         params=[params],
         iface_bitmaps=None,
         params_spec=operation.params,
         params_order=operation.paramsOrder,
         hint_volatiles=instance.get_hint(),
         runtime=self)
     try:
         execution_client = self.ready_clients[dest_backend_id]
     except KeyError:
         exeenv = self.get_execution_environment_info(dest_backend_id)
         execution_client = EEClient(exeenv.hostname, exeenv.port)
         self.ready_clients[dest_backend_id] = execution_client
     execution_client.synchronize(session_id, object_id, implementation_id,
                                  serialized_params)
Пример #11
0
    def prepare_for_new_replica_version_consolidate(self, object_id, object_hint,
                                                    backend_id, backend_hostname,
                                                    different_location):
        """
        Helper function to prepare information for new replica - version - consolidate algorithms
        :param object_id: id of the object
        :param backend_id: Destination backend ID to get information from (can be none)
        :param backend_hostname: Destination hostname to get information from (can be null)
        :param different_location: if true indicates that destination backend should be different to any location of the object
        :return: Tuple with destination backend API to call and:
	  		Either information of dest backend with id provided, some exec env in host provided or random exec env.
        """

        backend_id_to_call = object_hint
        if backend_id_to_call is None:
            backend_id_to_call = self.get_location(object_id)

        dest_backend_id = backend_id
        dest_backend = None
        if dest_backend_id is None:
            if backend_hostname is not None:
                exec_envs_at_host = self.get_all_execution_environments_at_host(backend_hostname)
                if len(exec_envs_at_host) > 0:
                    dest_backend = list(exec_envs_at_host.values())[0]
                    dest_backend_id = dest_backend.id
            if dest_backend is None:
                if different_location:
                    # no destination specified, get one destination in which object is not already replicated
                    obj_locations = self.get_all_locations(object_id)
                    all_exec_envs = self.get_all_execution_environments_at_dataclay(self.get_dataclay_id())
                    for exec_env_id, exec_env in all_exec_envs.items():
                        self.logger.debug(f"Checking if {exec_env_id} is in {obj_locations}")
                        for obj_location in obj_locations:
                            if str(exec_env_id) != str(obj_location):
                                dest_backend_id = exec_env_id
                                dest_backend = exec_env
                                break
                    if dest_backend is None:
                        self.logger.debug("Could not find any different location for replica, updating available exec envs")
                        # retry updating locations
                        all_exec_envs = self.get_all_execution_environments_at_dataclay(self.get_dataclay_id(), force_update=True)
                        for exec_env_id, exec_env in all_exec_envs.items():
                            for obj_location in obj_locations:
                                if str(exec_env_id) != str(obj_location):
                                    dest_backend_id = exec_env_id
                                    dest_backend = exec_env
                                    break
                if dest_backend is None:
                    dest_backend_id = object_hint
                    dest_backend = self.get_execution_environment_info(dest_backend_id)

        else:
            dest_backend = self.get_execution_environment_info(dest_backend_id)

        try:
            execution_client = self.ready_clients[backend_id_to_call]
        except KeyError:
            backend_to_call = self.get_execution_environment_info(backend_id_to_call)
            execution_client = EEClient(backend_to_call.hostname, backend_to_call.port)
            self.ready_clients[backend_id_to_call] = execution_client
        return execution_client, dest_backend
Пример #12
0
    def call_execute_to_ds(self, instance, parameters, operation_name, exeenv_id, using_hint):
        
        object_id = instance.get_object_id()
        operation = self.get_operation_info(object_id, operation_name)
        session_id = self.get_session_id()
        implementation_id = self.get_implementation_id(object_id, operation_name)

        # // === SERIALIZE PARAMETERS === //
        serialized_params = SerializationLibUtilsSingleton.serialize_params_or_return(
            params=parameters,
            iface_bitmaps=None,
            params_spec=operation.params,
            params_order=operation.paramsOrder,
            hint_volatiles=exeenv_id,
            runtime=self)
        
        if serialized_params is not None and serialized_params.vol_objs is not None:
            for param in serialized_params.vol_objs.values():
                self.volatile_parameters_being_send.add(param.object_id)
            
        # // === EXECUTE === //
        max_retry = Configuration.MAX_EXECUTION_RETRIES
        num_misses = 0
        executed = False
        for _ in range(max_retry):
            try:
                self.logger.verbose("Obtaining API for remote execution in %s ", exeenv_id)
                execution_client = self.ready_clients[exeenv_id]
            except KeyError:
                exeenv = self.get_execution_environment_info(exeenv_id)
                self.logger.debug("Not found in cache ExecutionEnvironment {%s}! Starting it at %s:%d",
                               exeenv_id, exeenv.hostname, exeenv.port)
                execution_client = EEClient(exeenv.hostname, exeenv.port)
                self.ready_clients[exeenv_id] = execution_client
    
            try:
                self.logger.verbose("Calling remote EE %s ", exeenv_id)
                ret = execution_client.ds_execute_implementation(
                    object_id,
                    implementation_id,
                    session_id,
                    serialized_params)
                executed = True
                break
            
            except (DataClayException, RpcError) as dce:
                self.logger.warning("Execution resulted in an error, retrying...", exc_info=dce)

                is_race_condition = False
                if serialized_params is not None and serialized_params.persistent_refs is not None:
                    for param in serialized_params.persistent_refs:
                        if param.object_id in self.volatile_parameters_being_send:
                            is_race_condition = True
                            break
                if not is_race_condition:
                    num_misses = num_misses + 1
                    self.logger.debug("Exception dataclay during execution. Retrying...")
                    self.logger.debug(str(dce))

                    locations = self.get_from_heap(object_id).get_replica_locations()
                    if locations is None or len(locations) == 0:
                        try:
                            locations = self.get_metadata(object_id).locations
                            new_location = False
                        except DataClayException:
                            locations = None
    
                    if locations is None:
                        self.logger.warning("Execution failed and no metadata available. Cannot continue")
                        raise
    
                    for loc in locations:
                        self.logger.debug("Found location %s" % str(loc))
                        if loc != exeenv_id:
                            exeenv_id = loc
                            self.logger.debug("Found different location %s" % str(loc))
                            new_location = True
                            break
                        
                    if not new_location: 
                        exeenv_id = next(iter(locations))
                    if using_hint:
                        instance.set_hint(exeenv_id)
                    self.logger.debug("[==Miss Jump==] MISS. The object %s was not in the exec.location %s. Retrying execution." 
                                % (instance.get_object_id(), str(exeenv_id)));
    
        if serialized_params is not None and serialized_params.vol_objs is not None:
            for param in serialized_params.vol_objs.values():
                if num_misses > 0: 
                    #===========================================================
                    # if there was a miss, it means that the persistent object in which we were executing 
                    # was not in the choosen location. As you can see in the serialize parameters function above
                    # we provide the execution environment as hint to set to volatile parameters. In EE, before
                    # deserialization of volatiles we check if the persistent object in which to execute a method is
                    # there, if not, EE raises and exception. Therefore, if there was a miss, we know that the 
                    # hint we set in volatile parameters is wrong, because they are going to be deserialized/stored
                    # in the same location as the object with the method to execute
                    #===========================================================
                    param_instance = self.get_from_heap(param.object_id)
                    param_instance.set_hint(exeenv_id)
                self.volatile_parameters_being_send.remove(param.object_id)
    
        if not executed: 
            raise RuntimeError("[dataClay] ERROR: Trying to execute remotely object  but something went wrong. "
                               "Maybe the object is still not stored (in case of asynchronous makepersistent) and "
                               "waiting time is not enough. Maybe the object does not exist anymore due to a remove. "
                               "Or Maybe an exception happened in the server and the call failed.")

        result = None
        if ret is None:
            self.logger.debug(f"Result of operation named {operation_name} received: None")
        else:
            self.logger.debug(f"Deserializing result of operation named {operation_name}, return type is {operation.returnType.signature}")
            result = DeserializationLibUtilsSingleton.deserialize_return(ret, None, operation.returnType, self)
            self.logger.debug(f"Deserialization of result of operation named {operation_name} successfully finished.")
        return result
Пример #13
0
    def make_persistent(self, instance, alias, backend_id, recursive):
        """ This method creates a new Persistent Object using the provided stub
        instance and, if indicated, all its associated objects also Logic module API used for communication
        This function is called from a stub/execution class
        :param instance: Instance to make persistent
        :param backend_id: Indicates which is the destination backend
        :param recursive: Indicates if make persistent is recursive
        :param alias: Alias for the object
        :returns: ID of the backend in which te object was persisted.
        :type instance: DataClayObject
        :type backend_id: DataClayID
        :type recursive: boolean
        :type alias: string
        :rtype: DataClayID
        :raises RuntimeError: if backend id is UNDEFINED_LOCAL.
        """

        self.logger.debug(
            "Starting make persistent object for instance with id %s",
            instance.get_object_id())
        if backend_id is UNDEFINED_LOCAL:
            # This is a commonruntime end user pitfall,
            # @abarcelo thinks that it is nice
            # (and exceptionally detailed) error
            raise RuntimeError("""
                You are trying to use dataclay.api.LOCAL but either:
                  - dataClay has not been initialized properly
                  - LOCAL has been wrongly imported.
                
                Be sure to use LOCAL with:
                
                from dataclay import api
                
                and reference it with `api.LOCAL`
                
                Refusing the temptation to guess.""")
        location = instance.get_hint()
        if location is None:
            location = backend_id
            # Choose location if needed
            # If object is already persistent -> it must have a Hint (location = hint here)
            # If object is not persistent -> location is choosen (provided backend id or random, hash...).
            if location is None:
                location = self.choose_location(instance, alias)

        if not instance.is_persistent():
            if alias is not None:
                # Add a new alias to an object.
                # Use cases:
                # 1 - object was persisted without alias and not yet registered -> we need to register it with new alias.
                # 2 - object was persisted and it is already registered -> we only add a new alias
                # 3 - object was persisted with an alias and it must be already registered -> we add a new alias.

                # From client side, we cannot check if object is registered or not (we do not have isPendingToRegister like EE)
                # Therefore, we call LogicModule with all information for registration.
                reg_infos = list()
                reg_info = RegistrationInfo(
                    instance.get_object_id(),
                    instance.get_class_extradata().class_id,
                    self.get_session_id(), instance.get_dataset_id(), alias)
                reg_infos.append(reg_info)
                new_object_ids = self.ready_clients["@LM"].register_objects(
                    reg_infos, location, LANG_PYTHON)
                self.logger.debug(f"Received ids: {new_object_ids}")
                new_object_id = next(iter(new_object_ids))
                self.update_object_id(instance, new_object_id)

                self.alias_cache[alias] = instance.get_object_id(
                ), instance.get_class_extradata().class_id, location

            # === MAKE PERSISTENT === #
            self.logger.debug("Instance with object ID %s being send to EE",
                              instance.get_object_id())
            # set the default master location
            instance.set_master_location(location)
            instance.set_alias(alias)
            # We serialize objects like volatile parameters
            parameters = list()
            parameters.append(instance)
            params_order = list()
            params_order.append("object")
            params_spec = dict()
            params_spec[
                "object"] = "DataClayObject"  # not used, see serialized_params_or_return
            serialized_objs = SerializationLibUtilsSingleton.serialize_params_or_return(
                params=parameters,
                iface_bitmaps=None,
                params_spec=params_spec,
                params_order=params_order,
                hint_volatiles=location,
                runtime=self,
                recursive=recursive)

            # Avoid some race-conditions in communication (make persistent + execute where
            # execute arrives before).
            # TODO: fix volatiles under deserialization support for __setstate__ and __getstate__
            self.add_volatiles_under_deserialization(
                serialized_objs.vol_objs.values())

            # Get EE
            try:
                execution_client = self.ready_clients[location]
            except KeyError:
                exeenv = self.get_execution_environment_info(location)
                self.logger.debug(
                    "Not found in cache ExecutionEnvironment {%s}! Starting it at %s:%d",
                    location, exeenv.hostname, exeenv.port)
                execution_client = EEClient(exeenv.hostname, exeenv.port)
                self.ready_clients[location] = execution_client

            # Call EE
            self.logger.verbose("Calling make persistent to EE %s ", location)
            execution_client.make_persistent(settings.current_session_id,
                                             serialized_objs.vol_objs.values())

            # update the hint with the location, and return it
            instance.set_hint(location)

            # remove volatiles under deserialization
            self.remove_volatiles_under_deserialization(
                serialized_objs.vol_objs.values())

        object_id = instance.get_object_id()
        locations = set()
        locations.add(location)
        metadata_info = MetaDataInfo(object_id, False,
                                     instance.get_dataset_id(),
                                     instance.get_class_extradata().class_id,
                                     locations, alias, None)
        self.metadata_cache[object_id] = metadata_info
        return location