def __init__(self, cluster_ip: str): self.__cluster_ip = cluster_ip self.__api_path = "/api/learningOrchestra/v1/explore/histogram" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__observer = Observer(self.__cluster_ip) self.__entity_reader = EntityReader(self.__service_url)
def __init__(self, cluster_ip: str): self.__api_path = "/api/learningOrchestra/v1/transform/projection" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip)
class TransformProjection: __INPUT_NAME = "inputDatasetName" __OUTPUT_NAME = "outputDatasetName" __FIELDS = "names" def __init__(self, cluster_ip: str): self.__api_path = "/api/learningOrchestra/v1/transform/projection" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def remove_dataset_attributes_sync(self, dataset_name: str, projection_name: str, fields: list, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method removes a set of attributes of a dataset synchronously, the caller waits until the projection is inserted into the Learning Orchestra storage mechanism. pretty_response: If returns true a string, otherwise a dictionary. projection_name: Represents the projection name. dataset_name: Represents the dataset name. fields: Represents the set of attributes to be removed. This is list with some attributes. return: A JSON object with error or warning messages. In case of success, it returns the projection metadata. """ request_body = { self.__INPUT_NAME: dataset_name, self.__OUTPUT_NAME: projection_name, self.__FIELDS: fields, } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(dataset_name) return self.__response_treat.treatment(response, pretty_response) def remove_dataset_attributes_async(self, dataset_name: str, projection_name: str, fields: list, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method removes a set of attributes of a dataset asynchronously; this way, the caller does not wait until the projection is inserted into the Learning Orchestra storage mechanism. A wait method call must occur to guarantee a synchronization barrier. pretty_response: If returns true a string, otherwise a dictionary. projection_name: Represents the projection name. dataset_name: Represents the dataset name. fields: Represents the set of attributes to be removed. This is list with some attributes. return: A JSON object with error or warning messages. In case of success, it returns the projection URL to be obtained latter with a wait method call. """ request_body = { self.__INPUT_NAME: dataset_name, self.__OUTPUT_NAME: projection_name, self.__FIELDS: fields, } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_projections(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all projection metadata, i.e., it does not retrieve the projection content. pretty_response: If true it returns a string, otherwise a dictionary. return: A list with all projections metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def search_projection_content(self, projection_name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the projection content. pretty_response: If true it returns a string, otherwise a dictionary. projection_name: Represents the projection name. query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: A page with some tuples or registers inside or an error if there is no such projection. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( projection_name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def delete_projection(self, projection_name: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for deleting a projection. The delete operation is always asynchronous and performed in background. pretty_response: If true it returns a string, otherwise a dictionary. projection_name: Represents the projection name. return: JSON object with an error message, a warning message or a correct delete message """ cluster_url_projection = f'{self.__service_url}/{projection_name}' response = requests.delete(cluster_url_projection) response.raise_for_status() return self.__response_treat.treatment(response, pretty_response) def wait(self, projection_name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the remove_dataset_attributes_async method, delete_projection method. name: Represents the projection name. timeout: Represents the time in seconds to wait for a projection to finish its run. return: JSON object with an error message, a warning message or a correct projection result """ return self.__observer.wait(projection_name, timeout)
class Transform: __PARENT_NAME_FIELD = "parentName" __MODEL_NAME_FIELD = "modelName" __METHOD_NAME_FIELD = "method" __ClASS_PARAMETERS_FIELD = "methodParameters" __NAME_FIELD = "name" __DESCRIPTION_FIELD = "description" def __init__(self, cluster_ip: str, api_path: str): self.__service_url = f'{cluster_ip}{api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def create_transform_sync(self, name: str, model_name: str, parent_name: str, method_name: str, parameters: dict, description: str = "", pretty_response: bool = False) -> \ Union[dict, str]: """ description: This method is responsible to transform datasets in sync mode pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the transform output object that will be created. parent_name: Is the name of the previous ML step of the pipeline method_name: is the name of the method to be executed (the ML tool way to transform datasets) parameters: Is the set of parameters used by the method return: A JSON object with an error or warning message or a URL indicating the correct operation. """ request_body = { self.__NAME_FIELD: name, self.__MODEL_NAME_FIELD: model_name, self.__PARENT_NAME_FIELD: parent_name, self.__METHOD_NAME_FIELD: method_name, self.__ClASS_PARAMETERS_FIELD: parameters, self.__DESCRIPTION_FIELD: description } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(name) return self.__response_treat.treatment(response, pretty_response) def create_transform_async(self, name: str, model_name: str, parent_name: str, method_name: str, parameters: dict, description: str = "", pretty_response: bool = False) -> \ Union[dict, str]: """ description: This method is responsible to transform datasets in async mode. The wait method must be called to guarantee a synchronization barrier. pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the transform output object that will be created. parent_name: Is the name of the previous ML step of the pipeline method_name: is the name of the method to be executed (the ML tool way to transform datasets) parameters: Is the set of parameters used by the method return: A JSON object with an error or warning message or a URL indicating the correct operation. """ request_body = { self.__NAME_FIELD: name, self.__MODEL_NAME_FIELD: model_name, self.__PARENT_NAME_FIELD: parent_name, self.__METHOD_NAME_FIELD: method_name, self.__ClASS_PARAMETERS_FIELD: parameters, self.__DESCRIPTION_FIELD: description } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_transformations(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all transform metadata, i.e., it does not retrieve the transform content. pretty_response: If true it returns a string, otherwise a dictionary. return: All transform metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def delete_transform(self, name: str, pretty_response=False) \ -> Union[dict, str]: """ description: This method is responsible for deleting a transform step. This delete operation is asynchronous, so it does not lock the caller until the deletion finished. Instead, it returns a JSON object with a URL for a future use. The caller uses the URL for delete checks. pretty_response: If true it returns a string, otherwise a dictionary. name: Represents the transform name. return: JSON object with an error message, a warning message or a correct delete message """ request_url = f'{self.__service_url}/{name}' response = requests.delete(request_url) return self.__response_treat.treatment(response, pretty_response) def search_transform_content(self, name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving a transform URL, which is useful to obtain the transform plottable content, as well as the metadata content pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the transform object query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return A page with transform content and metadata inside or an error if there is no such train object. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def wait(self, name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the create_transform_async method, delete_transform method. name: Represents the transform name. timeout: Represents the time in seconds to wait for a transform step to finish its run. return: JSON object with an error message, a warning message or a correct transform result """ return self.__observer.wait(name, timeout)
def __init__(self, cluster_ip: str, api_path: str): self.__service_url = f'{cluster_ip}{api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip)
class TransformDataType: __INPUT_NAME = "inputDatasetName" __TYPES = "types" def __init__(self, cluster_ip: str): self.__api_path = "/api/learningOrchestra/v1/transform/dataType" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def update_dataset_type_sync(self, dataset_name: str, types: dict, pretty_response: bool = False) \ -> Union[dict, str]: """ description: Change dataset field types (from number to string and vice-versa). Many type modifications can be performed in one method call. dataset_name: Represents the dataset name. types: Represents a map, where the pair key:value is a field:type return: A JSON object with error or warning messages or a correct datatype result. """ url_request = self.__service_url body_request = {self.__INPUT_NAME: dataset_name, self.__TYPES: types} response = requests.patch(url=url_request, json=body_request) self.__observer.wait(dataset_name) return self.__response_treat.treatment(response, pretty_response) def update_dataset_type_async(self, dataset_name: str, types: dict, pretty_response: bool = False) \ -> Union[dict, str]: """ description: Change dataset field types (from number to string and vice-versa). Many type modifications can be performed in one method call. Is is an asynchronous call, thus a wait method must be also called to guarantee a synchronization barrier. dataset_name: Represents the dataset name. types: Represents a map, where the pair key:value is a field:type return: A JSON object with error or warning messages or a correct datatype result. """ url_request = self.__service_url body_request = {self.__INPUT_NAME: dataset_name, self.__TYPES: types} response = requests.patch(url=url_request, json=body_request) return self.__response_treat.treatment(response, pretty_response) def search_all_datatype(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all datatype metadata, i.e., it does not retrieve the datatype content. pretty_response: If true it returns a string, otherwise a dictionary. return: All predict metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def delete_datatype(self, name: str, pretty_response=False) \ -> Union[dict, str]: """ description: This method is responsible for deleting the datatype step. This delete operation is asynchronous, so it does not lock the caller until the deletion finished. Instead, it returns a JSON object with a URL for a future use. The caller uses the URL for delete checks. pretty_response: If true it returns a string, otherwise a dictionary. name: Represents the datatype name. return: JSON object with an error message, a warning message or a correct delete message """ request_url = f'{self.__service_url}/{name}' response = requests.delete(request_url) return self.__response_treat.treatment(response, pretty_response) def search_datatype_content(self, name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving all the datatype tuples or registers, as well as the metadata content pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the datatype object query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: A page with some registers or tuples inside or an error if there is no such datatype object. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def wait(self, dataset_name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the update_dataset_type_async method, delete_datatype method. name: Represents the datatype name. timeout: Represents the time in seconds to wait for a datatype to finish its run. return: JSON object with an error message, a warning message or a correct datatype result """ return self.__observer.wait(dataset_name, timeout)
class Explore: __PARENT_NAME_FIELD = "parentName" __MODEL_NAME_FIELD = "modelName" __METHOD_NAME_FIELD = "method" __ClASS_PARAMETERS_FIELD = "methodParameters" __NAME_FIELD = "name" __DESCRIPTION_FIELD = "description" def __init__(self, cluster_ip: str, api_path: str): self.__service_url = f'{cluster_ip}{api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def create_explore_sync(self, name: str, model_name: str, parent_name: str, method_name: str, parameters: dict, description: str = "", pretty_response: bool = False) -> \ Union[dict, str]: """ description: This method runs an evaluation about a model in sync mode pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the model that will be explored. parent_name: The name of the previous pipe in the pipeline method_name: the name of the ML tool method used to explore a model parameters: the set of parameters of the ML method defined previously return: A JSON object with an error or warning message or a URL indicating the correct operation. """ request_body = { self.__NAME_FIELD: name, self.__MODEL_NAME_FIELD: model_name, self.__PARENT_NAME_FIELD: parent_name, self.__METHOD_NAME_FIELD: method_name, self.__ClASS_PARAMETERS_FIELD: parameters, self.__DESCRIPTION_FIELD: description} request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(name) return self.__response_treat.treatment(response, pretty_response) def create_explore_async(self, name: str, model_name: str, parent_name: str, method_name: str, parameters: dict, description: str = "", pretty_response: bool = False) -> \ Union[dict, str]: """ description: This method runs an explore service about a model in async mode pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the model that will be explored. parent_name: The name of the previous pipe in the pipeline method_name: the name of the ML tool method used to explore a model parameters: the set of parameters of the ML method defined previously return: A JSON object with an error or warning message or a URL indicating the correct operation. """ request_body = { self.__NAME_FIELD: name, self.__MODEL_NAME_FIELD: model_name, self.__PARENT_NAME_FIELD: parent_name, self.__METHOD_NAME_FIELD: method_name, self.__ClASS_PARAMETERS_FIELD: parameters, self.__DESCRIPTION_FIELD: description} request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_explores(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all created explorations, i.e., it does not retrieve the specific explore content. pretty_response: If true it returns a string, otherwise a dictionary. return: All datasets metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def delete_explore(self, name: str, pretty_response=False) \ -> Union[dict, str]: """ description: This method is responsible for deleting an explore result. This delete operation is asynchronous, so it does not lock the caller until the deletion finished. Instead, it returns a JSON object with a URL for a future use. The caller uses the wait method for delete checks. If a dataset was used by another task (Ex. projection, histogram, tune and so forth), it cannot be deleted. pretty_response: If true it returns a string, otherwise a dictionary. name: Represents the model name. return: JSON object with an error message, a warning message or a correct delete message """ request_url = f'{self.__service_url}/{name}' response = requests.delete(request_url) return self.__response_treat.treatment(response, pretty_response) def search_explore_image(self, name: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the explore image to be plotted pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the explore instance. return: An URL with a link for an image or an error if there is no such result. """ response = self.__entity_reader.read_entity_content( name) return self.__response_treat.treatment(response, pretty_response) def search_explore_metadata(self, name: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the explore metadata image. pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the explore instance. return: A page with some metadata inside or an error if there is no such dataset. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_explore_image_metadata(name) return self.__response_treat.treatment(response, pretty_response) def wait(self, name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the create_explore_async method, delete_explore_async method. name: Represents the model name. timeout: Represents the time in seconds to wait for an explore to finish its run. return: JSON object with an error message, a warning message or a correct explore result (the image URL as an explore result) """ return self.__observer.wait(name, timeout)
class BuilderSparkMl: __TRAIN_FIELD = "trainDatasetName" __TEST_FIELD = "testDatasetName" __CODE_FIELD = "modelingCode" __CLASSIFIERS_LIST_FIELD = "classifiersList" def __init__(self, cluster_ip: str): self.__api_path = "/api/learningOrchestra/v1/builder/sparkml" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def run_spark_ml_sync(self, train_dataset_name: str, test_dataset_name: str, modeling_code: str, model_classifiers: list, pretty_response: bool = False) -> Union[dict, str]: """ description: This method call runs several steps of a machine learning pipeline (transform, tune, train and evaluate, for instance) using a model code and several classifiers. It represents a way to run an entire pipeline. The caller waits until the method execution ends, since it is a synchronous method. train_dataset_name: Represent final train dataset. test_dataset_name: Represent final test dataset. modeling_code: Represent Python3 code for pyspark pre-processing model model_classifiers: list of initial classifiers to be used in the model pretty_response: if True it represents a result useful for visualization return: The set of predictions (URIs of them). """ request_body_content = { self.__TRAIN_FIELD: train_dataset_name, self.__TEST_FIELD: test_dataset_name, self.__CODE_FIELD: modeling_code, self.__CLASSIFIERS_LIST_FIELD: model_classifiers, } response = requests.post(url=self.__service_url, json=request_body_content) for classifier in model_classifiers: self.__observer.wait(f'{test_dataset_name}{classifier}') return self.__response_treat.treatment(response, pretty_response) def run_spark_ml_async(self, train_dataset_name: str, test_dataset_name: str, modeling_code: str, model_classifiers: list, pretty_response: bool = False) -> Union[dict, str]: """ description: This method call runs several steps of a machine learning pipeline (transform, tune, train and evaluate, for instance) using a model code and several classifiers. It represents a way to run an entire pipeline. The caller does not wait until the method execution ends, since it is an asynchronous method. train_dataset_name: Represent final train dataset. test_dataset_name: Represent final test dataset. modeling_code: Represent Python3 code for pyspark pre-processing model model_classifiers: list of initial classifiers to be used in the model pretty_response: if True it represents a result useful for visualization return: the URL to retrieve the Spark pipeline result """ request_body_content = { self.__TRAIN_FIELD: train_dataset_name, self.__TEST_FIELD: test_dataset_name, self.__CODE_FIELD: modeling_code, self.__CLASSIFIERS_LIST_FIELD: model_classifiers, } response = requests.post(url=self.__service_url, json=request_body_content) return self.__response_treat.treatment(response, pretty_response) def search_all_builders(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all model predictions metadata. It does not retrieve the model predictions content. pretty_response: If true it returns a string, otherwise a dictionary. return: A list with all model predictions metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def search_builder_register_predictions(self, builder_name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the model predictions content. pretty_response: If true it returns a string, otherwise a dictionary. builder_name: Represents the model predictions name. query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: A page with some tuples or registers inside or an error if the pipeline runs incorrectly. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( builder_name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def search_builder(self, builder_name: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving a specific model metadata. pretty_response: If true return indented string, else return dict. builder_name: Represents the model predictions name. limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: Specific model prediction metadata stored in Learning Orchestra or an error if there is no such projections. """ response = self.search_builder_register_predictions( builder_name, limit=1, pretty_response=pretty_response) return response def delete_builder(self, builder_name: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for deleting a model prediction. The delete operation is always asynchronous, since the deletion is performed in background. pretty_response: If true it returns a string, otherwise a dictionary. builder_name: Represents the pipeline name. return: JSON object with an error message, a warning message or a correct delete message """ cluster_url_dataset = f'{self.__service_url}/{builder_name}' response = requests.delete(cluster_url_dataset) return self.__response_treat.treatment(response, pretty_response) def wait(self, dataset_name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the run_spark_ml_async method. dataset_name: Represents the pipeline name. timeout: Represents the time in seconds to wait for a builder to finish its run. return: JSON object with an error message, a warning message or a correct execution of a pipeline """ return self.__observer.wait(dataset_name, timeout)
class Model: __CLASS_FIELD = "class" __MODULE_PATH_FIELD = "modulePath" __ClASS_PARAMETERS_FIELD = "classParameters" __NAME_FIELD = "modelName" __DESCRIPTION_FIELD = "description" def __init__(self, cluster_ip: str, api_path: str): self.__service_url = f'{cluster_ip}{api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def create_model_sync(self, name: str, module_path: str, class_name: str, class_parameters: dict, description: str = "", pretty_response: bool = False) -> Union[dict, str]: """ description: This method runs a model creation in sync mode pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the model that will be created. class_name: is the name of the class to be executed module_path: The name of the package of the ML tool used (Ex. Scikit-learn or TensorFlow) class_parameters: the set of parameters of the ML class defined previously return: A JSON object with an error or warning message or a URL indicating the correct operation. """ request_body = { self.__NAME_FIELD: name, self.__CLASS_FIELD: class_name, self.__MODULE_PATH_FIELD: module_path, self.__ClASS_PARAMETERS_FIELD: class_parameters, self.__DESCRIPTION_FIELD: description } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(name) return self.__response_treat.treatment(response, pretty_response) def create_model_async(self, name: str, module_path: str, class_name: str, class_parameters: dict, description: str = "", pretty_response: bool = False) -> Union[dict, str]: """ description: This method runs a model creation in async mode, thus it requires a wait method call pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the model that will be created. class_name: is the name of the class to be executed module_path: The name of the package of the ML tool used (Ex. Scikit-learn or TensorFlow) class_parameters: the set of parameters of the ML class defined previously return: A JSON object with an error or warning message or a URL indicating the future correct operation. """ request_body = { self.__NAME_FIELD: name, self.__CLASS_FIELD: class_name, self.__MODULE_PATH_FIELD: module_path, self.__ClASS_PARAMETERS_FIELD: class_parameters, self.__DESCRIPTION_FIELD: description } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_models(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all models metadata, i.e., it does not retrieve the model content. pretty_response: If true it returns a string, otherwise a dictionary. return: All models metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def delete_model(self, name: str, pretty_response=False) \ -> Union[dict, str]: """ description: This method is responsible for deleting the model. This delete operation is asynchronous, so it does not lock the caller until the deletion finished. Instead, it returns a JSON object with a URL for a future use. The caller uses the URL for delete checks. pretty_response: If true it returns a string, otherwise a dictionary. name: Represents the model name. return: JSON object with an error message, a warning message or a correct delete message """ request_url = f'{self.__service_url}/{name}' response = requests.delete(request_url) return self.__response_treat.treatment(response, pretty_response) def search_model(self, name: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves a model metadata, i.e., it does not retrieve the model content. pretty_response: If true it returns a string, otherwise a dictionary. name: Is the model name return: A model metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_entity_content(name) return self.__response_treat.treatment(response, pretty_response) def wait(self, name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the create_model_async method, delete_model method. name: Represents the model name. timeout: Represents the time in seconds to wait for a model creation to finish its run. return: JSON object with an error message, a warning message or a correct model result """ return self.__observer.wait(name, timeout)
class Dataset: __DATASET_NAME = "datasetName" __URL = "datasetURI" def __init__(self, cluster_ip: str, api_path: str): self.__service_url = f'{cluster_ip}{api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def insert_dataset_sync(self, dataset_name: str, url: str, pretty_response: bool = False) -> Union[dict, str]: """ description: This method is responsible to insert a dataset from a URI synchronously, i.e., the caller waits until the dataset is inserted into the Learning Orchestra storage mechanism. pretty_response: If true it returns a string, otherwise a dictionary. dataset_name: Is the name of the dataset file that will be created. url: Url to CSV file. return: A JSON object with an error or warning message or a URL indicating the correct operation. """ request_body = {self.__DATASET_NAME: dataset_name, self.__URL: url} request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(dataset_name) return self.__response_treat.treatment(response, pretty_response) def insert_dataset_async(self, dataset_name: str, url: str, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible to insert a dataset from a URI asynchronously, i.e., the caller does not wait until the dataset is inserted into the Learning Orchestra storage mechanism. Instead, the caller receives a JSON object with a URL to proceed future calls to verify if the dataset is inserted. pretty_response: If true return indented string, else return dict. dataset_name: Is the name of the dataset file that will be created. url: Url to CSV file. return: A JSON object with an error or warning message or a URL indicating the correct operation (the caller must use such an URL to proceed future checks to verify if the dataset is inserted - using wait method). """ request_body = {self.__DATASET_NAME: dataset_name, self.__URL: url} request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_datasets(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all datasets metadata, i.e., it does not retrieve the dataset content. pretty_response: If true it returns a string, otherwise a dictionary. return: All datasets metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def delete_dataset(self, dataset_name, pretty_response=False) \ -> Union[dict, str]: """ description: This method is responsible for deleting the dataset. This delete operation is asynchronous, so it does not lock the caller until the deletion finished. Instead, it returns a JSON object with a URL for a future use. The caller uses the URL for delete checks. If a dataset was used by another task (Ex. projection, histogram, pca, tune and so forth), it cannot be deleted. pretty_response: If true it returns a string, otherwise a dictionary. dataset_name: Represents the dataset name. return: JSON object with an error message, a warning message or a correct delete message """ request_url = f'{self.__service_url}/{dataset_name}' response = requests.delete(request_url) return self.__response_treat.treatment(response, pretty_response) def search_dataset_content(self, dataset_name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the dataset content pretty_response: If true it returns a string, otherwise a dictionary. dataset_name: Is the name of the dataset file. query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: A page with some tuples or registers inside or an error if there is no such dataset. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( dataset_name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def wait(self, dataset_name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the insert_dataset_async method. dataset_name: Represents the dataset name. timeout: Represents the time in seconds to wait for a dataset download to finish its run. return: JSON object with an error message, a warning message or a correct execution of a pipeline """ return self.__observer.wait(dataset_name, timeout)
class FunctionPython: __CODE_FIELD = "function" __PARAMETERS_FIELD = "functionParameters" __NAME_FIELD = "name" __DESCRIPTION_FIELD = "description" def __init__(self, cluster_ip: str): self.__api_path = "/api/learningOrchestra/v1/function/python" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__cluster_ip = cluster_ip self.__entity_reader = EntityReader(self.__service_url) self.__observer = Observer(self.__cluster_ip) def run_function_sync(self, name: str, parameters: dict, code: str, description: str = "", pretty_response: bool = False) -> Union[dict, str]: """ description: This method runs a python 3 code in sync mode, so it represents a wildcard for the data scientist. It can be used when train, predict, tune, explore or any other pipe must be customized. The function is also useful for new pipes. pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the object stored in Learning Orchestra storage system (volume or mongoDB). url: Url to CSV file. return: A JSON object with an error or warning message or the correct operation result. """ request_body = { self.__NAME_FIELD: name, self.__PARAMETERS_FIELD: parameters, self.__CODE_FIELD: code, self.__DESCRIPTION_FIELD: description } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(name) return self.__response_treat.treatment(response, pretty_response) def run_function_async(self, name: str, parameters: dict, code: str, description: str = "", pretty_response: bool = False) -> Union[dict, str]: """ description: This method runs a python 3 code in async mode, so it represents a wildcard for the data scientist. It does not lock the caller, so a wait method must be used. It can be used when train, predict, tune, explore or any other pipe must be customized. The function is also useful for new pipes. pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the function to be called code: the Python code parameters: the parameters of the function being called return: A JSON object with an error or warning message or the correct operation result. """ request_body = { self.__NAME_FIELD: name, self.__PARAMETERS_FIELD: parameters, self.__CODE_FIELD: code, self.__DESCRIPTION_FIELD: description } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_executions(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all created functions metadata, i.e., it does not retrieve the function result content. pretty_response: If true it returns a string, otherwise a dictionary. return: All function executions metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def delete_execution(self, name: str, pretty_response=False) \ -> Union[dict, str]: """ description: This method is responsible for deleting the function. This delete operation is asynchronous, so it does not lock the caller until the deletion finished. Instead, it returns a JSON object with a URL for a future use. The caller uses the URL for delete checks. pretty_response: If true it returns a string, otherwise a dictionary. name: Represents the function name. return: JSON object with an error message, a warning message or a correct delete message """ request_url = f'{self.__service_url}/{name}' response = requests.delete(request_url) return self.__response_treat.treatment(response, pretty_response) def search_execution_content(self, name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the function results, including metadata. A function is executed many times, using different parameters, thus many results are stored in Learning Orchestra. pretty_response: If true it returns a string, otherwise a dictionary. name: Is the name of the function. query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: A page with some function results inside or an error if there is no such function. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def wait(self, dataset_name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the run_function_async method or delete_function method. name: Represents the function name. timeout: Represents the time in seconds to wait for a function to finish its run. return: JSON object with an error message, a warning message or a correct function result """ return self.__observer.wait(dataset_name, timeout)
class ExploreHistogram: __INPUT_NAME = "inputDatasetName" __OUTPUT_NAME = "outputDatasetName" __FIELDS = "names" def __init__(self, cluster_ip: str): self.__cluster_ip = cluster_ip self.__api_path = "/api/learningOrchestra/v1/explore/histogram" self.__service_url = f'{cluster_ip}{self.__api_path}' self.__response_treat = ResponseTreat() self.__observer = Observer(self.__cluster_ip) self.__entity_reader = EntityReader(self.__service_url) def run_histogram_sync(self, dataset_name: str, histogram_name: str, fields: list, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method creates a histogram synchronously, so the caller waits until the histogram is inserted into the Learning Orchestra storage mechanism. dataset_name: Represents the name of dataset. histogram_name: Represents the name of histogram. fields: Represents a list of attributes. pretty_response: If true it returns a string, otherwise a dictionary. return: A JSON object with error or warning messages. In case of success, it returns a histogram. """ request_body = { self.__INPUT_NAME: dataset_name, self.__OUTPUT_NAME: histogram_name, self.__FIELDS: fields, } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) self.__observer.wait(dataset_name) return self.__response_treat.treatment(response, pretty_response) def run_histogram_async(self, dataset_name: str, histogram_name: str, fields: list, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method creates a histogram asynchronously, so the caller does not wait until the histogram is inserted into the Learning Orchestra storage mechanism. dataset_name: Represents the name of dataset. histogram_name: Represents the name of histogram. fields: Represents a list of attributes. pretty_response: If true it returns a string, otherwise a dictionary. return: A JSON object with error or warning messages. In case of success, it returns a histogram. """ request_body = { self.__INPUT_NAME: dataset_name, self.__OUTPUT_NAME: histogram_name, self.__FIELDS: fields, } request_url = self.__service_url response = requests.post(url=request_url, json=request_body) return self.__response_treat.treatment(response, pretty_response) def search_all_histograms(self, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method retrieves all histogram metadata, it does not retrieve the histogram content. pretty_response: If true it returns a string, otherwise a dictionary. return: A list with all histogram metadata stored in Learning Orchestra or an empty result. """ response = self.__entity_reader.read_all_instances_from_entity() return self.__response_treat.treatment(response, pretty_response) def search_histogram_content(self, histogram_name: str, query: dict = {}, limit: int = 10, skip: int = 0, pretty_response: bool = False) \ -> Union[dict, str]: """ description: This method is responsible for retrieving the histogram content. pretty_response: If true it returns a string, otherwise a dictionary. histogram_name: Represents the histogram name. query: Query to make in MongoDB(default: empty query) limit: Number of rows to return in pagination(default: 10) (maximum is set at 20 rows per request) skip: Number of rows to skip in pagination(default: 0) return: A page with some tuples or registers inside or an error if there is no such projection. The current page is also returned to be used in future content requests. """ response = self.__entity_reader.read_entity_content( histogram_name, query, limit, skip) return self.__response_treat.treatment(response, pretty_response) def delete_histogram(self, histogram_name: str, pretty_response: bool = False) -> Union[dict, str]: """ description: This method is responsible for deleting a histogram. The delete operation is always asynchronous, since the deletion is performed in background. pretty_response: If true it returns a string, otherwise a dictionary. histogram_name: Represents the histogram name. return: JSON object with an error message, a warning message or a correct delete message """ cluster_url_histogram = f'{self.__service_url}/{histogram_name}' response = requests.delete(cluster_url_histogram) return self.__response_treat.treatment(response, pretty_response) def wait(self, name: str, timeout: int = None) -> dict: """ description: This method is responsible to create a synchronization barrier for the run_histogram_async method or delete_histogram method. name: Represents the histogram name. timeout: Represents the time in seconds to wait for a histogram to finish its run. return: JSON object with an error message, a warning message or a correct histogram result """ return self.__observer.wait(name, timeout)