def outlierBySd(X: OperationNode, max_iterations: int, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: X._check_matrix_op() params_dict = {'X': X, 'max_iterations': max_iterations} params_dict.update(kwargs) return Matrix(X.sds_context, 'outlierBySd', named_input_nodes=params_dict)
def vectorToCsv(mask: OperationNode) -> Matrix: mask._check_matrix_op() params_dict = {'mask': mask} return Matrix(mask.sds_context, 'vectorToCsv', named_input_nodes=params_dict)
def discoverFD(X: OperationNode, Mask: OperationNode, threshold: float) -> Matrix: X._check_matrix_op() Mask._check_matrix_op() params_dict = {'X': X, 'Mask': Mask, 'threshold': threshold} return Matrix(X.sds_context, 'discoverFD', named_input_nodes=params_dict)
def kmeans(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: """ Performs KMeans on matrix input. :param x: Input dataset to perform K-Means on. :param k: The number of centroids to use for the algorithm. :param runs: The number of concurrent instances of K-Means to run (with different initial centroids). :param max_iter: The maximum number of iterations to run the K-Means algorithm for. :param eps: Tolerance for the algorithm to declare convergence using WCSS change ratio. :param is_verbose: Boolean flag if the algorithm should be run in a verbose manner. :param avg_sample_size_per_centroid: The average number of records per centroid in the data samples. :return: `OperationNode` List containing two outputs 1. the clusters, 2 the cluster ID associated with each row in x. """ x._check_matrix_op() if x.shape[0] == 0: raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." .format(s=x.shape)) if 'k' in kwargs.keys() and kwargs.get('k') < 1: raise ValueError( "Invalid number of clusters in K-Means, number must be integer above 0") params_dict = {'X': x} params_dict.update(kwargs) return OperationNode(x.sds_context, 'kmeans', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2)
def pca(x: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: """ Performs PCA on the matrix input :param x: Input dataset to perform Principal Componenet Analysis (PCA) on. :param K: The number of reduced dimensions. :param center: Boolean specifying if the input values should be centered. :param scale: Boolean specifying if the input values should be scaled. :return: `OperationNode` List containing two outputs 1. The dimensionality reduced X input, 2. A matrix to reduce dimensionality similarly on unseen data. """ x._check_matrix_op() if x.shape[0] == 0: raise ValueError("Found array with 0 feature(s) (shape={s}) while a minimum of 1 is required." .format(s=x.shape)) if 'K' in kwargs.keys() and kwargs.get('K') < 1: raise ValueError( "Invalid number of dimensions in PCA, number must be integer above 0") if 'scale' in kwargs.keys(): if kwargs.get('scale') == True: kwargs.set('scale', "TRUE") elif kwargs.get('scale' == False): kwargs.set('scale', "FALSE") if 'center' in kwargs.keys(): if kwargs.get('center') == True: kwargs.set('center', "TRUE") elif kwargs.get('center' == False): kwargs.set('center', "FALSE") params_dict = {'X': x} params_dict.update(kwargs) return OperationNode(x.sds_context, 'pca', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2)
def read(self, path: os.PathLike, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> 'OperationNode': """ Read an file from disk. Supportted types include: CSV, Matrix Market(coordinate), Text(i,j,v), SystemDS Binay See: http://apache.github.io/systemds/site/dml-language-reference#readwrite-built-in-functions for more details :return: an Operation Node, containing the read data. """ data_type = kwargs.get("data_type", None) file_format = kwargs.get("format", None) if data_type == "frame": kwargs["data_type"] = f'"{data_type}"' if isinstance(file_format, str): kwargs["format"] = f'"{kwargs["format"]}"' return Frame(self, None, f'"{path}"', **kwargs) elif data_type == "scalar": kwargs["data_type"] = f'"{data_type}"' value_type = kwargs.get("value_type", None) if value_type == "string": kwargs["value_type"] = f'"{kwargs["value_type"]}"' return OperationNode( self, "read", [f'"{path}"'], named_input_nodes=kwargs, shape=(-1, ), output_type=OutputType.SCALAR, ) return OperationNode(self, "read", [f'"{path}"'], named_input_nodes=kwargs, shape=(-1, ))
def bandit(X_train: OperationNode, Y_train: OperationNode, metaList: Iterable, targetList: Iterable, lp: OperationNode, primitives: OperationNode, param: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: X_train._check_matrix_op() Y_train._check_matrix_op() params_dict = { 'X_train': X_train, 'Y_train': Y_train, 'metaList': metaList, 'targetList': targetList, 'lp': lp, 'primitives': primitives, 'param': param } params_dict.update(kwargs) return OperationNode(X_train.sds_context, 'bandit', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=4, output_types=[ OutputType.FRAME, OutputType.MATRIX, OutputType.MATRIX, OutputType.FRAME ])
def alsDS(X: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: """ :param V: Location to read the input matrix V to be factorized :param L: Location to write the factor matrix L :param R: Location to write the factor matrix R :param rank: Rank of the factorization :param lambda: Regularization parameter, no regularization if 0.0 :param maxi: Maximum number of iterations :param check: Check for convergence after every iteration, i.e., updating L and R once :param thr: Assuming check is set to TRUE, the algorithm stops and convergence is declared :param if: in loss in any two consecutive iterations falls below this threshold; :param if: FALSE thr is ignored :return: 'OperationNode' containing x n matrix r """ X._check_matrix_op() params_dict = {'X': X} params_dict.update(kwargs) return OperationNode(X.sds_context, 'alsDS', named_input_nodes=params_dict, output_type=OutputType.LIST, number_of_outputs=2, output_types=[OutputType.MATRIX, OutputType.MATRIX])
def components(G: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: G._check_matrix_op() params_dict = {'G': G} params_dict.update(kwargs) return Matrix(G.sds_context, 'components', named_input_nodes=params_dict)
def outlier(X: OperationNode, opposite: bool) -> OperationNode: X._check_matrix_op() params_dict = {'X': X, 'opposite': opposite} return OperationNode(X.sds_context, 'outlier', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def imputeByMedian(X: OperationNode, mask: OperationNode) -> Matrix: X._check_matrix_op() mask._check_matrix_op() params_dict = {'X': X, 'mask': mask} return Matrix(X.sds_context, 'imputeByMedian', named_input_nodes=params_dict)
def vectorToCsv(vector: OperationNode) -> OperationNode: vector._check_matrix_op() params_dict = {'vector': vector} return OperationNode(vector.sds_context, 'vectorToCsv', named_input_nodes=params_dict, output_type=OutputType.STRING)
def getAccuracy(y: OperationNode, yhat: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: y._check_matrix_op() yhat._check_matrix_op() params_dict = {'y': y, 'yhat': yhat} params_dict.update(kwargs) return Matrix(y.sds_context, 'getAccuracy', named_input_nodes=params_dict)
def vectorToCsv(mask: OperationNode) -> OperationNode: mask._check_matrix_op() params_dict = {'mask': mask} return OperationNode(mask.sds_context, 'vectorToCsv', named_input_nodes=params_dict, output_type=OutputType.STRING)
def lmCG(X: OperationNode, y: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: X._check_matrix_op() y._check_matrix_op() params_dict = {'X': X, 'y': y} params_dict.update(kwargs) return Matrix(X.sds_context, 'lmCG', named_input_nodes=params_dict)
def img_mirror(img_in: OperationNode, horizontal_axis: bool) -> OperationNode: img_in._check_matrix_op() params_dict = {'img_in': img_in, 'horizontal_axis': horizontal_axis} return OperationNode(img_in.sds_context, 'img_mirror', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def msvm(X: OperationNode, Y: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: X._check_matrix_op() Y._check_matrix_op() params_dict = {'X': X, 'Y': Y} params_dict.update(kwargs) return Matrix(X.sds_context, 'msvm', named_input_nodes=params_dict)
def smote(X: OperationNode, mask: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: X._check_matrix_op() mask._check_matrix_op() params_dict = {'X': X, 'mask': mask} params_dict.update(kwargs) return Matrix(X.sds_context, 'smote', named_input_nodes=params_dict)
def xdummy1(X: OperationNode) -> OperationNode: X._check_matrix_op() params_dict = {'X': X} return OperationNode(X.sds_context, 'xdummy1', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def outlierByArima(X: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> Matrix: X._check_matrix_op() params_dict = {'X': X} params_dict.update(kwargs) return Matrix(X.sds_context, 'outlierByArima', named_input_nodes=params_dict)
def intersect(X: OperationNode, Y: OperationNode) -> OperationNode: X._check_matrix_op() Y._check_matrix_op() params_dict = {'X': X, 'Y': Y} return OperationNode(X.sds_context, 'intersect', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def dist(X: OperationNode) -> Matrix: X._check_matrix_op() params_dict = {'X':X} return Matrix(X.sds_context, 'dist', named_input_nodes=params_dict)
def img_brightness(img_in: OperationNode, value: float, channel_max: int) -> Matrix: img_in._check_matrix_op() params_dict = {'img_in':img_in, 'value':value, 'channel_max':channel_max} return Matrix(img_in.sds_context, 'img_brightness', named_input_nodes=params_dict)
def knnbf(X: OperationNode, T: OperationNode, k_value: int) -> OperationNode: X._check_matrix_op() T._check_matrix_op() params_dict = {'X': X, 'T': T, 'k_value': k_value} return OperationNode(X.sds_context, 'knnbf', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def winsorize(X: OperationNode, verbose: bool) -> Matrix: X._check_matrix_op() params_dict = {'X':X, 'verbose':verbose} return Matrix(X.sds_context, 'winsorize', named_input_nodes=params_dict)
def winsorize(X: OperationNode, verbose: bool) -> OperationNode: X._check_matrix_op() params_dict = {'X':X, 'verbose':verbose} return OperationNode(X.sds_context, 'winsorize', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def img_crop(img_in: OperationNode, w: int, h: int, x_offset: int, y_offset: int) -> OperationNode: img_in._check_matrix_op() params_dict = {'img_in':img_in, 'w':w, 'h':h, 'x_offset':x_offset, 'y_offset':y_offset} return OperationNode(img_in.sds_context, 'img_crop', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def imputeByMean(X: OperationNode, mask: OperationNode) -> OperationNode: X._check_matrix_op() mask._check_matrix_op() params_dict = {'X': X, 'mask': mask} return OperationNode(X.sds_context, 'imputeByMean', named_input_nodes=params_dict, output_type=OutputType.MATRIX)
def img_mirror(img_in: OperationNode, horizontal_axis: bool) -> Matrix: img_in._check_matrix_op() params_dict = {'img_in':img_in, 'horizontal_axis':horizontal_axis} return Matrix(img_in.sds_context, 'img_mirror', named_input_nodes=params_dict)
def components(G: OperationNode, **kwargs: Dict[str, VALID_INPUT_TYPES]) -> OperationNode: G._check_matrix_op() params_dict = {'G': G} params_dict.update(kwargs) return OperationNode(G.sds_context, 'components', named_input_nodes=params_dict, output_type=OutputType.MATRIX)