def __init__(self, cluster_ip: str):
     self.__cluster_ip = cluster_ip
     self.__api_path = "/api/learningOrchestra/v1/explore/histogram"
     self.__service_url = f'{cluster_ip}{self.__api_path}'
     self.__response_treat = ResponseTreat()
     self.__observer = Observer(self.__cluster_ip)
     self.__entity_reader = EntityReader(self.__service_url)
 def __init__(self, cluster_ip: str):
     self.__api_path = "/api/learningOrchestra/v1/transform/projection"
     self.__service_url = f'{cluster_ip}{self.__api_path}'
     self.__response_treat = ResponseTreat()
     self.__cluster_ip = cluster_ip
     self.__entity_reader = EntityReader(self.__service_url)
     self.__observer = Observer(self.__cluster_ip)
class TransformProjection:
    __INPUT_NAME = "inputDatasetName"
    __OUTPUT_NAME = "outputDatasetName"
    __FIELDS = "names"

    def __init__(self, cluster_ip: str):
        self.__api_path = "/api/learningOrchestra/v1/transform/projection"
        self.__service_url = f'{cluster_ip}{self.__api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def remove_dataset_attributes_sync(self,
                                       dataset_name: str,
                                       projection_name: str,
                                       fields: list,
                                       pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method removes a set of attributes of a dataset
        synchronously, the caller waits until the projection is inserted into
        the Learning Orchestra storage mechanism.

        pretty_response: If returns true a string, otherwise a dictionary.
        projection_name: Represents the projection name.
        dataset_name: Represents the dataset name.
        fields: Represents the set of attributes to be removed. This is list
        with some attributes.

        return: A JSON object with error or warning messages. In case of
        success, it returns the projection metadata.
        """

        request_body = {
            self.__INPUT_NAME: dataset_name,
            self.__OUTPUT_NAME: projection_name,
            self.__FIELDS: fields,
        }
        request_url = self.__service_url
        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(dataset_name)

        return self.__response_treat.treatment(response, pretty_response)

    def remove_dataset_attributes_async(self,
                                        dataset_name: str,
                                        projection_name: str,
                                        fields: list,
                                        pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method removes a set of attributes of a dataset
        asynchronously; this way, the caller does not wait until the projection
        is inserted into the Learning Orchestra storage mechanism. A wait
        method call must occur to guarantee a synchronization barrier.

        pretty_response: If returns true a string, otherwise a dictionary.
        projection_name: Represents the projection name.
        dataset_name: Represents the dataset name.
        fields: Represents the set of attributes to be removed. This is list
        with some attributes.

        return: A JSON object with error or warning messages. In case of
        success, it returns the projection URL to be obtained latter with a
        wait method call.
        """

        request_body = {
            self.__INPUT_NAME: dataset_name,
            self.__OUTPUT_NAME: projection_name,
            self.__FIELDS: fields,
        }
        request_url = self.__service_url
        response = requests.post(url=request_url, json=request_body)

        return self.__response_treat.treatment(response, pretty_response)

    def search_all_projections(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all projection metadata, i.e., it
        does not retrieve the projection content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: A list with all projections metadata stored in Learning
        Orchestra or an empty result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def search_projection_content(self,
                                  projection_name: str,
                                  query: dict = {},
                                  limit: int = 10,
                                  skip: int = 0,
                                  pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for retrieving the projection
        content.

        pretty_response: If true it returns a string, otherwise a dictionary.
        projection_name: Represents the projection name.
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return: A page with some tuples or registers inside or an error if there
        is no such projection. The current page is also returned to be used in
        future content requests.
        """

        response = self.__entity_reader.read_entity_content(
            projection_name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def delete_projection(self, projection_name: str,
                          pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting a projection.
        The delete operation is always asynchronous and performed in background.


        pretty_response: If true it returns a string, otherwise a dictionary.
        projection_name: Represents the projection name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """
        cluster_url_projection = f'{self.__service_url}/{projection_name}'

        response = requests.delete(cluster_url_projection)
        response.raise_for_status()

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, projection_name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the remove_dataset_attributes_async method,
           delete_projection method.

           name: Represents the projection name.
           timeout: Represents the time in seconds to wait for a projection to
           finish its run.

           return: JSON object with an error message, a warning message or a
           correct projection result
        """
        return self.__observer.wait(projection_name, timeout)
示例#4
0
class Transform:
    __PARENT_NAME_FIELD = "parentName"
    __MODEL_NAME_FIELD = "modelName"
    __METHOD_NAME_FIELD = "method"
    __ClASS_PARAMETERS_FIELD = "methodParameters"
    __NAME_FIELD = "name"
    __DESCRIPTION_FIELD = "description"

    def __init__(self, cluster_ip: str, api_path: str):
        self.__service_url = f'{cluster_ip}{api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def create_transform_sync(self,
                              name: str,
                              model_name: str,
                              parent_name: str,
                              method_name: str,
                              parameters: dict,
                              description: str = "",
                              pretty_response: bool = False) -> \
            Union[dict, str]:
        """
        description: This method is responsible to transform datasets in sync
        mode

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the transform output object that will be created.
        parent_name: Is the name of the previous ML step of the pipeline
        method_name: is the name of the method to be executed
        (the ML tool way to transform datasets)
        parameters: Is the set of parameters used by the method

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__MODEL_NAME_FIELD: model_name,
            self.__PARENT_NAME_FIELD: parent_name,
            self.__METHOD_NAME_FIELD: method_name,
            self.__ClASS_PARAMETERS_FIELD: parameters,
            self.__DESCRIPTION_FIELD: description
        }

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(name)

        return self.__response_treat.treatment(response, pretty_response)

    def create_transform_async(self,
                               name: str,
                               model_name: str,
                               parent_name: str,
                               method_name: str,
                               parameters: dict,
                               description: str = "",
                               pretty_response: bool = False) -> \
            Union[dict, str]:
        """
        description: This method is responsible to transform datasets in async
        mode. The wait method must be called to guarantee a synchronization
        barrier.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the transform output object that will be created.
        parent_name: Is the name of the previous ML step of the pipeline
        method_name: is the name of the method to be executed (the ML tool way
        to transform datasets)
        parameters: Is the set of parameters used by the method

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__MODEL_NAME_FIELD: model_name,
            self.__PARENT_NAME_FIELD: parent_name,
            self.__METHOD_NAME_FIELD: method_name,
            self.__ClASS_PARAMETERS_FIELD: parameters,
            self.__DESCRIPTION_FIELD: description
        }

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        return self.__response_treat.treatment(response, pretty_response)

    def search_all_transformations(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all transform metadata, i.e., it does
        not retrieve the transform content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: All transform metadata stored in Learning Orchestra or an empty
        result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def delete_transform(self, name: str, pretty_response=False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting a transform step.
        This delete operation is asynchronous, so it does not lock the caller
         until the deletion finished. Instead, it returns a JSON object with a
         URL for a future use. The caller uses the URL for delete checks.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Represents the transform name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """
        request_url = f'{self.__service_url}/{name}'

        response = requests.delete(request_url)
        return self.__response_treat.treatment(response, pretty_response)

    def search_transform_content(self,
                                 name: str,
                                 query: dict = {},
                                 limit: int = 10,
                                 skip: int = 0,
                                 pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving a transform
        URL, which is useful to obtain the transform plottable content, as well
        as the metadata content

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the transform object
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return A page with transform content and metadata inside or an error if
        there is no such train object. The current page is also returned to be
        used in future content requests.
        """
        response = self.__entity_reader.read_entity_content(
            name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the create_transform_async method, delete_transform
           method.

           name: Represents the transform name.
           timeout: Represents the time in seconds to wait for a transform step
           to finish its run.

           return: JSON object with an error message, a warning message or a
           correct transform result
        """
        return self.__observer.wait(name, timeout)
示例#5
0
 def __init__(self, cluster_ip: str, api_path: str):
     self.__service_url = f'{cluster_ip}{api_path}'
     self.__response_treat = ResponseTreat()
     self.__cluster_ip = cluster_ip
     self.__entity_reader = EntityReader(self.__service_url)
     self.__observer = Observer(self.__cluster_ip)
class TransformDataType:
    __INPUT_NAME = "inputDatasetName"
    __TYPES = "types"

    def __init__(self, cluster_ip: str):
        self.__api_path = "/api/learningOrchestra/v1/transform/dataType"
        self.__service_url = f'{cluster_ip}{self.__api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def update_dataset_type_sync(self,
                                 dataset_name: str,
                                 types: dict,
                                 pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: Change dataset field types (from number to string and
        vice-versa). Many type modifications can be performed in one method
        call.

        dataset_name: Represents the dataset name.
        types: Represents a map, where the pair key:value is a field:type

        return: A JSON object with error or warning messages or a correct
        datatype result.
        """
        url_request = self.__service_url
        body_request = {self.__INPUT_NAME: dataset_name, self.__TYPES: types}

        response = requests.patch(url=url_request, json=body_request)
        self.__observer.wait(dataset_name)

        return self.__response_treat.treatment(response, pretty_response)

    def update_dataset_type_async(self,
                                  dataset_name: str,
                                  types: dict,
                                  pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: Change dataset field types (from number to string and
        vice-versa). Many type modifications can be performed in one method
        call. Is is an asynchronous call, thus a wait method must be also
        called to guarantee a synchronization barrier.

        dataset_name: Represents the dataset name.
        types: Represents a map, where the pair key:value is a field:type

        return: A JSON object with error or warning messages or a correct
        datatype result.
        """
        url_request = self.__service_url
        body_request = {self.__INPUT_NAME: dataset_name, self.__TYPES: types}

        response = requests.patch(url=url_request, json=body_request)
        return self.__response_treat.treatment(response, pretty_response)

    def search_all_datatype(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all datatype metadata, i.e., it does
        not retrieve the datatype content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: All predict metadata stored in Learning Orchestra or an empty
        result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def delete_datatype(self, name: str, pretty_response=False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting the datatype step.
        This delete operation is asynchronous, so it does not lock the caller
         until the deletion finished. Instead, it returns a JSON object with a
         URL for a future use. The caller uses the URL for delete checks.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Represents the datatype name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """
        request_url = f'{self.__service_url}/{name}'

        response = requests.delete(request_url)
        return self.__response_treat.treatment(response, pretty_response)

    def search_datatype_content(self,
                                name: str,
                                query: dict = {},
                                limit: int = 10,
                                skip: int = 0,
                                pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving all the datatype
        tuples or registers, as well as the metadata content

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the datatype object
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return: A page with some registers or tuples inside or an error if there
        is no such datatype object. The current page is also returned to be
        used in future content requests.
        """
        response = self.__entity_reader.read_entity_content(
            name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, dataset_name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the update_dataset_type_async method, delete_datatype
           method.

           name: Represents the datatype name.
           timeout: Represents the time in seconds to wait for a datatype to
           finish its run.

           return: JSON object with an error message, a warning message or a
           correct datatype result
        """
        return self.__observer.wait(dataset_name, timeout)
class Explore:
    __PARENT_NAME_FIELD = "parentName"
    __MODEL_NAME_FIELD = "modelName"
    __METHOD_NAME_FIELD = "method"
    __ClASS_PARAMETERS_FIELD = "methodParameters"
    __NAME_FIELD = "name"
    __DESCRIPTION_FIELD = "description"

    def __init__(self, cluster_ip: str, api_path: str):
        self.__service_url = f'{cluster_ip}{api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def create_explore_sync(self,
                            name: str,
                            model_name: str,
                            parent_name: str,
                            method_name: str,
                            parameters: dict,
                            description: str = "",
                            pretty_response: bool = False) -> \
            Union[dict, str]:
        """
        description: This method runs an evaluation about a model in sync mode

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the model that will be explored.
        parent_name: The name of the previous pipe in the pipeline
        method_name: the name of the ML tool method used to explore a model
        parameters: the set of parameters of the ML method defined previously

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__MODEL_NAME_FIELD: model_name,
            self.__PARENT_NAME_FIELD: parent_name,
            self.__METHOD_NAME_FIELD: method_name,
            self.__ClASS_PARAMETERS_FIELD: parameters,
            self.__DESCRIPTION_FIELD: description}

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(name)

        return self.__response_treat.treatment(response, pretty_response)

    def create_explore_async(self,
                             name: str,
                             model_name: str,
                             parent_name: str,
                             method_name: str,
                             parameters: dict,
                             description: str = "",
                             pretty_response: bool = False) -> \
            Union[dict, str]:
        """
        description: This method runs an explore service about a model in async
        mode

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the model that will be explored.
        parent_name: The name of the previous pipe in the pipeline
        method_name: the name of the ML tool method used to explore a model
        parameters: the set of parameters of the ML method defined previously

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__MODEL_NAME_FIELD: model_name,
            self.__PARENT_NAME_FIELD: parent_name,
            self.__METHOD_NAME_FIELD: method_name,
            self.__ClASS_PARAMETERS_FIELD: parameters,
            self.__DESCRIPTION_FIELD: description}

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        return self.__response_treat.treatment(response, pretty_response)

    def search_all_explores(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all created explorations, i.e., it
        does not retrieve the specific explore content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: All datasets metadata stored in Learning Orchestra or an empty
        result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def delete_explore(self, name: str, pretty_response=False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting an explore result.
        This delete operation is asynchronous, so it does not lock the caller
         until the deletion finished. Instead, it returns a JSON object with a
         URL for a future use. The caller uses the wait method for delete
         checks. If a dataset was used by another task (Ex. projection,
         histogram, tune and so forth), it cannot be deleted.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Represents the model name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """

        request_url = f'{self.__service_url}/{name}'

        response = requests.delete(request_url)
        return self.__response_treat.treatment(response, pretty_response)

    def search_explore_image(self,
                             name: str,
                             pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving the explore
        image to be plotted

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the explore instance.

        return: An URL with a link for an image or an error if there
        is no such result.
        """

        response = self.__entity_reader.read_entity_content(
            name)

        return self.__response_treat.treatment(response, pretty_response)

    def search_explore_metadata(self,
                                name: str,
                                pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving the explore
        metadata image.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the explore instance.

        return: A page with some metadata inside or an error if there
        is no such dataset. The current page is also returned to be used in
        future content requests.
        """

        response = self.__entity_reader.read_explore_image_metadata(name)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, name: str, timeout: int = None) -> dict:
        """
       description: This method is responsible to create a synchronization
       barrier for the create_explore_async method, delete_explore_async
       method.

       name: Represents the model name.
       timeout: Represents the time in seconds to wait for an explore to
       finish its run.

       return: JSON object with an error message, a warning message or a
       correct explore result (the image URL as an explore result)
        """
        return self.__observer.wait(name, timeout)
示例#8
0
class BuilderSparkMl:
    __TRAIN_FIELD = "trainDatasetName"
    __TEST_FIELD = "testDatasetName"
    __CODE_FIELD = "modelingCode"
    __CLASSIFIERS_LIST_FIELD = "classifiersList"

    def __init__(self, cluster_ip: str):
        self.__api_path = "/api/learningOrchestra/v1/builder/sparkml"
        self.__service_url = f'{cluster_ip}{self.__api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def run_spark_ml_sync(self,
                          train_dataset_name: str,
                          test_dataset_name: str,
                          modeling_code: str,
                          model_classifiers: list,
                          pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method call runs several steps of a machine
        learning pipeline (transform, tune, train and evaluate, for instance)
        using a model code and several classifiers. It represents a way to run
        an entire pipeline. The caller waits until the method execution ends,
        since it is a synchronous method.

        train_dataset_name: Represent final train dataset.
        test_dataset_name: Represent final test dataset.
        modeling_code: Represent Python3 code for pyspark pre-processing model
        model_classifiers: list of initial classifiers to be used in the model
        pretty_response: if True it represents a result useful for visualization

        return: The set of predictions (URIs of them).
        """

        request_body_content = {
            self.__TRAIN_FIELD: train_dataset_name,
            self.__TEST_FIELD: test_dataset_name,
            self.__CODE_FIELD: modeling_code,
            self.__CLASSIFIERS_LIST_FIELD: model_classifiers,
        }
        response = requests.post(url=self.__service_url,
                                 json=request_body_content)

        for classifier in model_classifiers:
            self.__observer.wait(f'{test_dataset_name}{classifier}')

        return self.__response_treat.treatment(response, pretty_response)

    def run_spark_ml_async(self,
                           train_dataset_name: str,
                           test_dataset_name: str,
                           modeling_code: str,
                           model_classifiers: list,
                           pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method call runs several steps of a machine
        learning pipeline (transform, tune, train and evaluate, for instance)
        using a model code and several classifiers. It represents a way to run
        an entire pipeline. The caller does not wait until the method execution
        ends, since it is an asynchronous method.

        train_dataset_name: Represent final train dataset.
        test_dataset_name: Represent final test dataset.
        modeling_code: Represent Python3 code for pyspark pre-processing model
        model_classifiers: list of initial classifiers to be used in the model
        pretty_response: if True it represents a result useful for visualization

        return: the URL to retrieve the Spark pipeline result
        """

        request_body_content = {
            self.__TRAIN_FIELD: train_dataset_name,
            self.__TEST_FIELD: test_dataset_name,
            self.__CODE_FIELD: modeling_code,
            self.__CLASSIFIERS_LIST_FIELD: model_classifiers,
        }
        response = requests.post(url=self.__service_url,
                                 json=request_body_content)

        return self.__response_treat.treatment(response, pretty_response)

    def search_all_builders(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all model predictions metadata. It
        does not retrieve the model predictions content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: A list with all model predictions metadata stored in Learning
        Orchestra or an empty result.
        """

        response = self.__entity_reader.read_all_instances_from_entity()

        return self.__response_treat.treatment(response, pretty_response)

    def search_builder_register_predictions(self,
                                            builder_name: str,
                                            query: dict = {},
                                            limit: int = 10,
                                            skip: int = 0,
                                            pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for retrieving the model
        predictions content.

        pretty_response: If true it returns a string, otherwise a dictionary.
        builder_name: Represents the model predictions name.
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return: A page with some tuples or registers inside or an error if the
        pipeline runs incorrectly. The current page is also returned to be used
        in future content requests.
        """

        response = self.__entity_reader.read_entity_content(
            builder_name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def search_builder(self, builder_name: str, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving a specific
        model metadata.

        pretty_response: If true return indented string, else return dict.
        builder_name: Represents the model predictions name.
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return: Specific model prediction metadata stored in Learning Orchestra
        or an error if there is no such projections.
        """
        response = self.search_builder_register_predictions(
            builder_name, limit=1, pretty_response=pretty_response)

        return response

    def delete_builder(self, builder_name: str, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting a model prediction.
        The delete operation is always asynchronous,
        since the deletion is performed in background.

        pretty_response: If true it returns a string, otherwise a dictionary.
        builder_name: Represents the pipeline name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """

        cluster_url_dataset = f'{self.__service_url}/{builder_name}'

        response = requests.delete(cluster_url_dataset)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, dataset_name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the run_spark_ml_async method.

           dataset_name: Represents the pipeline name.
           timeout: Represents the time in seconds to wait for a builder to
           finish its run.

           return: JSON object with an error message, a warning message or a
           correct execution of a pipeline
        """
        return self.__observer.wait(dataset_name, timeout)
示例#9
0
class Model:
    __CLASS_FIELD = "class"
    __MODULE_PATH_FIELD = "modulePath"
    __ClASS_PARAMETERS_FIELD = "classParameters"
    __NAME_FIELD = "modelName"
    __DESCRIPTION_FIELD = "description"

    def __init__(self, cluster_ip: str, api_path: str):
        self.__service_url = f'{cluster_ip}{api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def create_model_sync(self,
                          name: str,
                          module_path: str,
                          class_name: str,
                          class_parameters: dict,
                          description: str = "",
                          pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method runs a model creation in sync mode

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the model that will be created.
        class_name: is the name of the class to be executed
        module_path: The name of the package of the ML tool used
        (Ex. Scikit-learn or TensorFlow)
        class_parameters: the set of parameters of the ML class defined
        previously

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__CLASS_FIELD: class_name,
            self.__MODULE_PATH_FIELD: module_path,
            self.__ClASS_PARAMETERS_FIELD: class_parameters,
            self.__DESCRIPTION_FIELD: description
        }

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(name)

        return self.__response_treat.treatment(response, pretty_response)

    def create_model_async(self,
                           name: str,
                           module_path: str,
                           class_name: str,
                           class_parameters: dict,
                           description: str = "",
                           pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method runs a model creation in async mode, thus it
        requires a wait method call

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the model that will be created.
        class_name: is the name of the class to be executed
        module_path: The name of the package of the ML tool used
        (Ex. Scikit-learn or TensorFlow)
        class_parameters: the set of parameters of the ML class defined
        previously

        return: A JSON object with an error or warning message or a URL
        indicating the future correct operation.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__CLASS_FIELD: class_name,
            self.__MODULE_PATH_FIELD: module_path,
            self.__ClASS_PARAMETERS_FIELD: class_parameters,
            self.__DESCRIPTION_FIELD: description
        }

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)

        return self.__response_treat.treatment(response, pretty_response)

    def search_all_models(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all models metadata, i.e., it does
        not retrieve the model content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: All models metadata stored in Learning Orchestra or an empty
        result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def delete_model(self, name: str, pretty_response=False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting the model.
        This delete operation is asynchronous, so it does not lock the caller
         until the deletion finished. Instead, it returns a JSON object with a
         URL for a future use. The caller uses the URL for delete checks.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Represents the model name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """

        request_url = f'{self.__service_url}/{name}'

        response = requests.delete(request_url)
        return self.__response_treat.treatment(response, pretty_response)

    def search_model(self, name: str, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves a model metadata, i.e., it does
        not retrieve the model content.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the model name

        return: A model metadata stored in Learning Orchestra or an empty
        result.
        """

        response = self.__entity_reader.read_entity_content(name)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the create_model_async method, delete_model method.

           name: Represents the model name.
           timeout: Represents the time in seconds to wait for a model creation
           to finish its run.

           return: JSON object with an error message, a warning message or a
           correct model result
        """
        return self.__observer.wait(name, timeout)
示例#10
0
class Dataset:
    __DATASET_NAME = "datasetName"
    __URL = "datasetURI"

    def __init__(self, cluster_ip: str, api_path: str):
        self.__service_url = f'{cluster_ip}{api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def insert_dataset_sync(self,
                            dataset_name: str,
                            url: str,
                            pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method is responsible to insert a dataset from a URI
        synchronously, i.e., the caller waits until the dataset is inserted into
        the Learning Orchestra storage mechanism.

        pretty_response: If true it returns a string, otherwise a dictionary.
        dataset_name: Is the name of the dataset file that will be created.
        url: Url to CSV file.

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation.
        """
        request_body = {self.__DATASET_NAME: dataset_name, self.__URL: url}
        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(dataset_name)

        return self.__response_treat.treatment(response, pretty_response)

    def insert_dataset_async(self,
                             dataset_name: str,
                             url: str,
                             pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method is responsible to insert a dataset from a URI
        asynchronously, i.e., the caller does not wait until the dataset is
        inserted into the Learning Orchestra storage mechanism. Instead, the
        caller receives a JSON object with a URL to proceed future calls to
        verify if the dataset is inserted.

        pretty_response: If true return indented string, else return dict.
        dataset_name: Is the name of the dataset file that will be created.
        url: Url to CSV file.

        return: A JSON object with an error or warning message or a URL
        indicating the correct operation (the caller must use such an URL to
        proceed future checks to verify if the dataset is inserted - using wait
        method).
        """
        request_body = {self.__DATASET_NAME: dataset_name, self.__URL: url}
        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        return self.__response_treat.treatment(response, pretty_response)

    def search_all_datasets(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all datasets metadata, i.e., it does
        not retrieve the dataset content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: All datasets metadata stored in Learning Orchestra or an empty
        result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def delete_dataset(self, dataset_name, pretty_response=False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting the dataset.
        This delete operation is asynchronous, so it does not lock the caller
         until the deletion finished. Instead, it returns a JSON object with a
         URL for a future use. The caller uses the URL for delete checks. If a
         dataset was used by another task (Ex. projection, histogram, pca, tune
         and so forth), it cannot be deleted.

        pretty_response: If true it returns a string, otherwise a dictionary.
        dataset_name: Represents the dataset name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """

        request_url = f'{self.__service_url}/{dataset_name}'
        response = requests.delete(request_url)

        return self.__response_treat.treatment(response, pretty_response)

    def search_dataset_content(self,
                               dataset_name: str,
                               query: dict = {},
                               limit: int = 10,
                               skip: int = 0,
                               pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving the dataset
        content

        pretty_response: If true it returns a string, otherwise a dictionary.
        dataset_name: Is the name of the dataset file.
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return: A page with some tuples or registers inside or an error if there
        is no such dataset. The current page is also returned to be used in
        future content requests.
        """

        response = self.__entity_reader.read_entity_content(
            dataset_name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, dataset_name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the insert_dataset_async method.

           dataset_name: Represents the dataset name.
           timeout: Represents the time in seconds to wait for a dataset
           download to finish its run.

           return: JSON object with an error message, a warning message or a
           correct execution of a pipeline
        """
        return self.__observer.wait(dataset_name, timeout)
示例#11
0
class FunctionPython:
    __CODE_FIELD = "function"
    __PARAMETERS_FIELD = "functionParameters"
    __NAME_FIELD = "name"
    __DESCRIPTION_FIELD = "description"

    def __init__(self, cluster_ip: str):
        self.__api_path = "/api/learningOrchestra/v1/function/python"
        self.__service_url = f'{cluster_ip}{self.__api_path}'
        self.__response_treat = ResponseTreat()
        self.__cluster_ip = cluster_ip
        self.__entity_reader = EntityReader(self.__service_url)
        self.__observer = Observer(self.__cluster_ip)

    def run_function_sync(self,
                          name: str,
                          parameters: dict,
                          code: str,
                          description: str = "",
                          pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method runs a python 3 code in sync mode, so it
        represents a wildcard for the data scientist. It can be used when
        train, predict, tune, explore or any other pipe must be customized. The
        function is also useful for new pipes. pretty_response: If true it
        returns a string, otherwise a dictionary.

        name: Is the name of the object stored in Learning Orchestra storage
        system (volume or mongoDB).
        url: Url to CSV file.

        return: A JSON object with an error or warning message or the correct
        operation result.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__PARAMETERS_FIELD: parameters,
            self.__CODE_FIELD: code,
            self.__DESCRIPTION_FIELD: description
        }

        request_url = self.__service_url
        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(name)

        return self.__response_treat.treatment(response, pretty_response)

    def run_function_async(self,
                           name: str,
                           parameters: dict,
                           code: str,
                           description: str = "",
                           pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method runs a python 3 code in async mode, so it
        represents a wildcard for the data scientist. It does not lock the
        caller, so a wait method must be used. It can be used when train,
        predict, tune, explore or any other pipe must be customized. The
        function is also useful for new pipes.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the function to be called
        code: the Python code
        parameters: the parameters of the function being called

        return: A JSON object with an error or warning message or the correct
        operation result.
        """
        request_body = {
            self.__NAME_FIELD: name,
            self.__PARAMETERS_FIELD: parameters,
            self.__CODE_FIELD: code,
            self.__DESCRIPTION_FIELD: description
        }

        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        return self.__response_treat.treatment(response, pretty_response)

    def search_all_executions(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all created functions metadata,
        i.e., it does not retrieve the function result content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: All function executions metadata stored in Learning Orchestra
        or an empty result.
        """
        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def delete_execution(self, name: str, pretty_response=False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for deleting the function.
        This delete operation is asynchronous, so it does not lock the caller
         until the deletion finished. Instead, it returns a JSON object with a
         URL for a future use. The caller uses the URL for delete checks.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Represents the function name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """

        request_url = f'{self.__service_url}/{name}'

        response = requests.delete(request_url)
        return self.__response_treat.treatment(response, pretty_response)

    def search_execution_content(self,
                                 name: str,
                                 query: dict = {},
                                 limit: int = 10,
                                 skip: int = 0,
                                 pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description:  This method is responsible for retrieving the function
        results, including metadata. A function is executed many times, using
        different parameters,
        thus many results are stored
        in Learning Orchestra.

        pretty_response: If true it returns a string, otherwise a dictionary.
        name: Is the name of the function.
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return:
         A page with some function results inside or an error if there
        is no such function. The current page is also returned to be used in
        future content requests.
        """

        response = self.__entity_reader.read_entity_content(
            name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, dataset_name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the run_function_async method or delete_function method.

           name: Represents the function name.
           timeout: Represents the time in seconds to wait for a function to
           finish its run.

           return: JSON object with an error message, a warning message or a
           correct function result
        """
        return self.__observer.wait(dataset_name, timeout)
示例#12
0
class ExploreHistogram:
    __INPUT_NAME = "inputDatasetName"
    __OUTPUT_NAME = "outputDatasetName"
    __FIELDS = "names"

    def __init__(self, cluster_ip: str):
        self.__cluster_ip = cluster_ip
        self.__api_path = "/api/learningOrchestra/v1/explore/histogram"
        self.__service_url = f'{cluster_ip}{self.__api_path}'
        self.__response_treat = ResponseTreat()
        self.__observer = Observer(self.__cluster_ip)
        self.__entity_reader = EntityReader(self.__service_url)

    def run_histogram_sync(self,
                           dataset_name: str,
                           histogram_name: str,
                           fields: list,
                           pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method creates a histogram
        synchronously, so the caller waits until the histogram is inserted into
        the Learning Orchestra storage mechanism.

        dataset_name: Represents the name of dataset.
        histogram_name: Represents the name of histogram.
        fields: Represents a list of attributes.
        pretty_response: If true it returns a string, otherwise a dictionary.

        return: A JSON object with error or warning messages. In case of
        success, it returns a histogram.
        """

        request_body = {
            self.__INPUT_NAME: dataset_name,
            self.__OUTPUT_NAME: histogram_name,
            self.__FIELDS: fields,
        }
        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)
        self.__observer.wait(dataset_name)

        return self.__response_treat.treatment(response, pretty_response)

    def run_histogram_async(self,
                            dataset_name: str,
                            histogram_name: str,
                            fields: list,
                            pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method creates a histogram
        asynchronously, so the caller does not wait until the histogram is
        inserted into the Learning Orchestra storage mechanism.

        dataset_name: Represents the name of dataset.
        histogram_name: Represents the name of histogram.
        fields: Represents a list of attributes.
        pretty_response: If true it returns a string, otherwise a dictionary.

        return: A JSON object with error or warning messages. In case of
        success, it returns a histogram.
        """

        request_body = {
            self.__INPUT_NAME: dataset_name,
            self.__OUTPUT_NAME: histogram_name,
            self.__FIELDS: fields,
        }
        request_url = self.__service_url

        response = requests.post(url=request_url, json=request_body)

        return self.__response_treat.treatment(response, pretty_response)

    def search_all_histograms(self, pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method retrieves all histogram metadata, it does not
        retrieve the histogram content.

        pretty_response: If true it returns a string, otherwise a dictionary.

        return: A list with all histogram metadata stored in Learning Orchestra
        or an empty result.
        """

        response = self.__entity_reader.read_all_instances_from_entity()
        return self.__response_treat.treatment(response, pretty_response)

    def search_histogram_content(self,
                                 histogram_name: str,
                                 query: dict = {},
                                 limit: int = 10,
                                 skip: int = 0,
                                 pretty_response: bool = False) \
            -> Union[dict, str]:
        """
        description: This method is responsible for retrieving the histogram
        content.

        pretty_response: If true it returns a string, otherwise a dictionary.
        histogram_name: Represents the histogram name.
        query: Query to make in MongoDB(default: empty query)
        limit: Number of rows to return in pagination(default: 10) (maximum is
        set at 20 rows per request)
        skip: Number of rows to skip in pagination(default: 0)

        return: A page with some tuples or registers inside or an error if there
        is no such projection. The current page is also returned to be used in
        future content requests.
        """

        response = self.__entity_reader.read_entity_content(
            histogram_name, query, limit, skip)

        return self.__response_treat.treatment(response, pretty_response)

    def delete_histogram(self,
                         histogram_name: str,
                         pretty_response: bool = False) -> Union[dict, str]:
        """
        description: This method is responsible for deleting a histogram.
        The delete operation is always asynchronous,
        since the deletion is performed in background.

        pretty_response: If true it returns a string, otherwise a dictionary.
        histogram_name: Represents the histogram name.

        return: JSON object with an error message, a warning message or a
        correct delete message
        """

        cluster_url_histogram = f'{self.__service_url}/{histogram_name}'
        response = requests.delete(cluster_url_histogram)

        return self.__response_treat.treatment(response, pretty_response)

    def wait(self, name: str, timeout: int = None) -> dict:
        """
           description: This method is responsible to create a synchronization
           barrier for the run_histogram_async method or delete_histogram
           method.

           name: Represents the histogram name.
           timeout: Represents the time in seconds to wait for a histogram to
           finish its run.

           return: JSON object with an error message, a warning message or a
           correct histogram result
        """
        return self.__observer.wait(name, timeout)