class TensorMachinesBinaryClassification(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): """ Learns a polynomial function using logistic regression for binary classification by modeling the polynomial's coefficients as low-rank tensors. Meant as a faster, more scalable alternative to polynomial random feature map approaches like CRAFTMaps. """ __author__ = "ICSI" # a la directions on https://gitlab.datadrivendiscovery.org/jpl/primitives_repo metadata = metadata_module.PrimitiveMetadata({ 'id': 'ecc83605-d340-490d-9a2d-81c2ea6cb6cb', #uuid3(NAMESPACE_DNS, "realML.kernel.TensorMachineBinaryClassification" + __version__), 'version': __version__, 'name': 'Tensor Machine Binary Classifier', 'description': 'Fit a polynomial function for logistic regression by modeling the polynomial coefficients as collection of low-rank tensors', 'python_path': 'd3m.primitives.realML.kernel.TensorMachinesBinaryClassification', 'primitive_family': metadata_module.PrimitiveFamily.CLASSIFICATION, 'algorithm_types' : [ metadata_module.PrimitiveAlgorithmType.LOGISTIC_REGRESSION, ], 'keywords' : ['kernel learning', 'binary classification', 'adaptive features', 'polynomial model', 'classification'], 'source' : { 'name': __author__, 'contact': 'mailto:[email protected]', 'citation': 'https://arxiv.org/abs/1504.01697', 'uris' : [ "http://*****:*****@{git_commit}#egg=realML'.format(git_commit=utils.current_git_commit(os.path.dirname(__file__))) } ], 'location_uris': [ # NEED TO REF SPECIFIC COMMIT 'https://github.com/alexgittens/realML/blob/master/realML/kernel/TensorMachinesBinaryClassification.py', ], 'preconditions': [ metadata_module.PrimitivePrecondition.NO_MISSING_VALUES, metadata_module.PrimitivePrecondition.NO_CATEGORICAL_VALUES ], }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, str] = None) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._seed = random_seed self._training_inputs = None self._training_outputs = None self._fitted = False self._weights = None self._norms = None def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: self._training_inputs = inputs self._training_outputs = outputs if self.hyperparams['preprocess'] == 'YES': (self._training_inputs, self._norms) = tm_preprocess(self._training_inputs) self._fitted = False def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._fitted: return CallResult(None) if self._training_inputs is None or self._training_outputs is None: raise ValueError("Missing training data.") if len(self._training_outputs.shape) == 1: self._training_outputs = np.expand_dims(self._training_outputs, axis=1) (self._weights, _) = tm_fit(self._training_inputs, self._training_outputs, 'bc', self.hyperparams['r'], self.hyperparams['q'], self.hyperparams['gamma'], self.hyperparams['solver'], self.hyperparams['epochs'], self.hyperparams['alpha'], seed=self._seed) self._fitted = True return CallResult(None) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: if self.hyperparams['preprocess'] == 'YES': inputs = tm_preprocess(inputs, colnorms=self._norms) pred_test = tm_predict(self._weights, inputs, self.hyperparams['q'], self.hyperparams['r'], 'bc') return CallResult(sign(pred_test.flatten()).astype(int)) def get_params(self) -> Params: return Params(weights=self._weights, norms=self._norms) def set_params(self, *, params: Params) -> None: self._weights = params['weights'] self._norms = params['norms']
import os from d3m_metadata import utils D3M_API_VERSION = '2018.1.26' VERSION = "0.1.0" TAG_NAME = "{git_commit}".format(git_commit=utils.current_git_commit( os.path.dirname(__file__)), ) REPOSITORY = "https://github.com/rooshenas/dsbox-spen" PACAKGE_NAME = "dsbox-spen" D3M_PERFORMER_TEAM = 'UMASS' if TAG_NAME: PACKAGE_URI = "git+" + REPOSITORY + "@" + TAG_NAME else: PACKAGE_URI = "git+" + REPOSITORY PACKAGE_URI = PACKAGE_URI + "#egg=" + PACAKGE_NAME INSTALLATION_TYPE = 'GIT' if INSTALLATION_TYPE == 'PYPI': INSTALLATION = {"type": "PIP", "package": PACAKGE_NAME, "version": VERSION} else: # INSTALLATION_TYPE == 'GIT' INSTALLATION = { "type": "PIP", "package_uri": PACKAGE_URI, }
class MonomialPrimitive( supervised_learning.SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): # It is important to provide a docstring because this docstring is used as a description of # a primitive. Some callers might analyze it to determine the nature and purpose of a primitive. """ A primitive which fits output = a * input. """ # This should contain only metadata which cannot be automatically determined from the code. metadata = metadata_module.PrimitiveMetadata({ # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". 'id': '4a0336ae-63b9-4a42-860e-86c5b64afbdd', 'version': "crap", 'name': "Monomial Regressor", # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. 'keywords': ['test primitive'], 'source': { 'name': "boss", 'uris': [ # Unstructured URIs. Link to file and link to repo in this case. 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/monomial.py', 'https://gitlab.com/datadrivendiscovery/tests-data.git', ], }, # A list of dependencies in order. These can be Python packages, system packages, or Docker images. # Of course Python packages can also have their own dependencies, but sometimes it is necessary to # install a Python package first to be even able to run setup.py of another package. Or you have # a dependency which is not on PyPi. 'installation': [{ 'type': metadata_module.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirectory=primitives' .format(git_commit=utils.current_git_commit( os.path.dirname(__file__)), ), }], # URIs at which one can obtain code for the primitive, if available. 'location_uris': [ 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py' .format(git_commit=utils.current_git_commit( os.path.dirname(__file__)), ), ], # The same path the primitive is registered with entry points in setup.py. 'python_path': 'd3m.primitives.test.MonomialPrimitive', # Choose these from a controlled vocabulary in the schema. If anything is missing which would # best describe the primitive, make a merge request. 'algorithm_types': [ metadata_module.PrimitiveAlgorithmType.LINEAR_REGRESSION, ], 'primitive_family': metadata_module.PrimitiveFamily.REGRESSION, }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: typing.Dict[str, str] = None) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._a: float = None self._training_inputs: Inputs = None self._training_outputs: Outputs = None self._fitted: bool = False def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]: if self._a is None: raise ValueError("Calling produce before fitting.") # We compute the result. We use (...) here and not [...] to create a # generator and not a list which would then just be copied into "List". result = (self._a * input + self.hyperparams['bias'] for input in inputs) # We convert a regular list to container list which supports metadata attribute. outputs: container.List[float] = container.List[float](result) # We clear old metadata (but which keeps history and link to inputs metadata) and set new metadata. # "for_value" tells that this new metadata will be associated with "outputs", # and "source" tells which primitive generated this metadata. metadata = inputs.metadata.clear( { 'schema': metadata_module.CONTAINER_SCHEMA_VERSION, 'structural_type': type(outputs), 'dimension': { 'length': len(outputs) } }, for_value=outputs, source=self).update((metadata_module.ALL_ELEMENTS, ), { 'structural_type': float, }, source=self) # Set metadata attribute. outputs.metadata = metadata # Wrap it into default "CallResult" object: we are not doing any iterations. return base.CallResult(outputs) def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: self._training_inputs = inputs self._training_outputs = outputs self._fitted = False def fit(self, *, timeout: float = None, iterations: int = None) -> base.CallResult[None]: if self._fitted: return base.CallResult(None) if not self._training_inputs or not self._training_inputs: raise ValueError("Missing training data.") quotients = [ output / input for output, input in zip( self._training_outputs, self._training_inputs) if input != 0 ] self._a = sum(quotients) / len(quotients) self._fitted = True return base.CallResult(None) def get_params(self) -> Params: # You can pass a dict or keyword arguments. return Params(a=self._a) def set_params(self, *, params: Params) -> None: # Params are just a fancy dict. self._a = params['a']
class TensorMachinesRegularizedLeastSquares( SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): """ Fits an l2-regularized least squares polynomial regression model by modeling the coefficients of the polynomial with a low-rank tensor. Intended as a scalable alternative to polynomial random feature maps like CRAFTMaps. """ __author__ = "ICSI" # a la directions on https://gitlab.datadrivendiscovery.org/jpl/primitives_repo metadata = metadata_module.PrimitiveMetadata({ 'id': '2d8155bb-3ca8-39de-8964-adb21225868e', 'version': __version__, 'name': 'Tensor Machine Regularized Least Squares', 'description': 'Fit a polynomial function for l2-regularized regression by modeling the polynomial coefficients as collection of low-rank tensors', 'python_path': 'd3m.primitives.realML.kernel.TensorMachinesRegularizedLeastSquares', 'primitive_family': metadata_module.PrimitiveFamily.REGRESSION, 'algorithm_types': [ metadata_module.PrimitiveAlgorithmType.KERNEL_METHOD, metadata_module.PrimitiveAlgorithmType.POLYNOMIAL_NEURAL_NETWORK ], 'keywords': [ 'kernel learning', 'polynomial regression', 'adaptive features', 'polynomial model', 'regression' ], 'source': { 'name': __author__, 'contact': 'mailto:[email protected]', 'citation': 'https://arxiv.org/abs/1504.01697', 'uris': [ "http://*****:*****@{git_commit}#egg=realML' .format( git_commit=utils.current_git_commit(os.path.dirname(__file__))) }], 'location_uris': [ # NEED TO REF SPECIFIC COMMIT 'https://github.com/alexgittens/realML/blob/master/realML/kernel/TensorMachinesRegularizedLeastSquares.py', ], 'preconditions': [ metadata_module.PrimitivePrecondition.NO_MISSING_VALUES, metadata_module.PrimitivePrecondition.NO_CATEGORICAL_VALUES ], }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, str] = None) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._seed = random_seed self._training_inputs = None self._training_outputs = None self._fitted = False self._weights = None self._norms = None def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: self._training_inputs = inputs self._training_outputs = outputs if self.hyperparams['preprocess'] == 'YES': (self._training_inputs, self._norms) = tm_preprocess(self._training_inputs) self._fitted = False def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._fitted: return CallResult(None) if self._training_inputs is None or self._training_outputs is None: raise ValueError("Missing training data.") (self._weights, _) = tm_fit(self._training_inputs, self._training_outputs, 'regression', self.hyperparams['r'], self.hyperparams['q'], self.hyperparams['gamma'], self.hyperparams['solver'], self.hyperparams['epochs'], self.hyperparams['alpha'], seed=self._seed) self._fitted = True return CallResult(None) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: if self.hyperparams['preprocess'] == 'YES': inputs = tm_preprocess(inputs, colnorms=self._norms) pred_test = tm_predict(self._weights, inputs, self.hyperparams['q'], self.hyperparams['r'], 'regression') return CallResult(pred_test.flatten()) def get_params(self) -> Params: return Params(weights=self._weights, norms=self._norms) def set_params(self, *, params: Params) -> None: self._weights = params['weights'] self._norms = params['norms']
class BBNSVC(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): """ Primitive wrapping for sklearn.ensemble.AdaBoostClassifier """ __author__ = "JPL MARVIN" metadata = metadata_module.PrimitiveMetadata({ "algorithm_types": ['ADABOOST'], "name": "sklearn.svm.classes.SVC", "primitive_family": "CLASSIFICATION", "python_path": "d3m.primitives.bbn.time_series.BBNSVC", "source": {'name': 'JPL'}, "version": "0.1.0", "id": "a2ee7b2b-99c6-4326-b2e7-e081cd292d78", 'installation': [{'type': metadata_module.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://gitlab.datadrivendiscovery.org/jpl/d3m_sklearn_wrap.git@{git_commit}'.format( git_commit=utils.current_git_commit(os.path.dirname(__file__)), ), }] }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, str] = None, _verbose: int = 0) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._clf = SVC( C=self.hyperparams['C'], kernel=self.hyperparams['kernel'], degree=self.hyperparams['degree'], gamma=self.hyperparams['gamma'], coef0=self.hyperparams['coef0'], probability=self.hyperparams['probability'], shrinking=self.hyperparams['shrinking'], tol=self.hyperparams['tol'], class_weight=self.hyperparams['class_weight'], max_iter=self.hyperparams['max_iter'], decision_function_shape=self.hyperparams['decision_function_shape'], verbose=_verbose, random_state=self.random_seed, ) self._training_inputs = None self._training_outputs = None self._fitted = False def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: self._training_inputs = inputs self._training_outputs = outputs self._fitted = False def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._fitted: return CallResult(None) if self._training_inputs is None or self._training_outputs is None: raise ValueError("Missing training data.") self._clf.fit(self._training_inputs, self._training_outputs) self._fitted = True return CallResult(None) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: return CallResult(self._clf.predict(inputs)) def produce_log_proba(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: return CallResult(self._clf.predict_log_proba(inputs)) def get_params(self) -> Params: return Params( support=self._clf.support_, support_vectors=self._clf.support_vectors_, n_support=self._clf.n_support_, dual_coef=self._clf.dual_coef_, coef=self._clf.coef_, intercept=self._clf.intercept_, ) def set_params(self, *, params: Params) -> None: self._clf.support_ = params.support self._clf.support_vectors_ = params.support_vectors self._clf.n_support_ = params.n_support self._clf.dual_coef_ = params.dual_coef self._clf.intercept_ = params.intercept
"class_attributes": { "metadata": "d3m_metadata.metadata.PrimitiveMetadata" }, "instance_attributes": { "hyperparams": "d3m_metadata.hyperparams.Hyperparams", "random_seed": "int", "docker_containers": "typing.Dict[str, str]" } }, "structural_type": "test_primitives.increment.IncrementPrimitive", "description": "A primitive which increments each value by a fixed amount, by default 1." } """.replace('__INTERFACES_VERSION__', primitive_interfaces.__version__).replace( '__GIT_COMMIT__', utils.current_git_commit(TEST_PRIMITIVES_DIR)) class TestIncrementPrimitive(unittest.TestCase): # It is not necessary to call "can_accept" before calling a method # during runtime, but we are doing it here for testing purposes. def call_primitive(self, primitive, method_name, **kwargs): primitive_arguments = primitive.metadata.query( )['primitive_code']['arguments'] arguments = {} for argument_name, argument_value in kwargs.items(): if primitive_arguments[argument_name][ 'kind'] == metadata.PrimitiveArgumentKind.PIPELINE: arguments[argument_name] = argument_value.metadata
class JHUGraph(ClusteringPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): # TODO: Create metadata for this # This should contain only metadata which cannot be automatically determined from the code. metadata = metadata_module.PrimitiveMetadata({ # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". 'id': 'b940ccbd-9e9b-3166-af50-210bfd79251b', 'version': "crap", 'name': "Monomial Regressor", # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. 'keywords': ['test primitive'], 'source': { 'name': "boss", 'uris': [ # Unstructured URIs. Link to file and link to repo in this case. 'https://gitlab.com/datadrivendiscovery/tests-data/blob/master/primitives/test_primitives/monomial.py', 'https://gitlab.com/datadrivendiscovery/tests-data.git', ], }, # A list of dependencies in order. These can be Python packages, system packages, or Docker images. # Of course Python packages can also have their own dependencies, but sometimes it is necessary to # install a Python package first to be even able to run setup.py of another package. Or you have # a dependency which is not on PyPi. 'installation': [{ 'type': metadata_module.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://gitlab.com/datadrivendiscovery/tests-data.git@{git_commit}#egg=test_primitives&subdirecto\ ry=primitives'.format( git_commit=utils.current_git_commit(os.path.dirname(__file__)), ), }], # URIs at which one can obtain code for the primitive, if available. 'location_uris': [ 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format( git_commit=utils.current_git_commit(os.path.dirname(__file__)), ), ], # The same path the primitive is registered with entry points in setup.py. 'python_path': 'd3m.primitives.test.MonomialPrimitive', # Choose these from a controlled vocabulary in the schema. If anything is missing which would # best describe the primitive, make a merge request. 'algorithm_types': [ metadata_module.PrimitiveAlgorithmType.LINEAR_REGRESSION, ], 'primitive_family': metadata_module.PrimitiveFamily.REGRESSION, }) _adjacency_matrix = None _num_vertices = None _num_edges = None _directed = None _weighted = None _dangling_nodes = None def read_graph(self, *, fname: str) -> None: dtype = self.hyperparams['dtype'] if dtype == "gml": self._object = read_graph(fname, "gml") elif dtype.startswith("edge"): self._object = read_graph(fname, "edge") else: raise NotImplementedError("Reading graphs of type '{}'".\ format(dtype)) self._num_vertices = ig_get_num_vertices(self._object) self._num_edges = ig_get_num_edges(self._object) self._directed = ig_is_directed(self._object) self._weighted = ig_is_weighted(self._object) def compute_statistics(self) -> Outputs: self._dangling_nodes = ig_get_dangling_nodes(self._object) def get_adjacency_matrix(self) -> Outputs: return ig_get_adjacency_matrix(self._object) def get_dense_matrix(self) -> Outputs: return ig_get_dense_matrix(self._object) def get_num_vertices(self) -> int: return self._num_vertices def get_num_edges(self) -> int: return self._num_edges def is_directed(self) -> bool: return self._directed def is_weighted(self) -> bool: return self._weighted def get_dangling_nodes(self) -> Outputs: if (self._dangling_nodes is None): self.compute_statistics() return self._dangling_nodes def summary(self) -> None: ig_summary(self._object) def set_training_data(self, *, inputs: Inputs) -> None: # type: ignore pass def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: return base.CallResult(self.get_adjacency_matrix()) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: return base.CallResult(None) def get_params(self) -> Params: return Params(other={}) def set_params(self, *, params: Params) -> None: return None
"description": "A noop." } }, "class_attributes": { "metadata": "d3m_metadata.metadata.PrimitiveMetadata" }, "instance_attributes": { "hyperparams": "d3m_metadata.hyperparams.Hyperparams", "random_seed": "int", "docker_containers": "typing.Dict[str, str]" } }, "structural_type": "test_primitives.sum.SumPrimitive", "description": "A primitive which sums all the values on input into one number." } """.replace('__INTERFACES_VERSION__', primitive_interfaces.__version__).replace('__GIT_COMMIT__', utils.current_git_commit(TEST_PRIMITIVES_DIR)) class TestSumPrimitive(unittest.TestCase): @classmethod def setUpClass(cls): cls.docker_client = docker.from_env() cls.docker_containers = {} # Start all containers (this pulls images if they do not yet exist). installation = SumPrimitive.metadata.query().get('installation', []) for entry in installation: if entry['type'] != metadata.PrimitiveInstallationType.DOCKER: continue
class duke(PrimitiveBase[Inputs, Outputs, Params, Hyperparams]): metadata = metadata_module.PrimitiveMetadata({ # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". 'id': "46612a42-6120-3559-9db9-3aa9a76eb94f", 'version': __version__, 'name': "duke", # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. 'keywords': ['Dataset Descriptor'], 'source': { 'name': __author__, 'uris': [ # Unstructured URIs. "https://github.com/NewKnowledge/duke-thin-client", ], }, # A list of dependencies in order. These can be Python packages, system packages, or Docker images. # Of course Python packages can also have their own dependencies, but sometimes it is necessary to # install a Python package first to be even able to run setup.py of another package. Or you have # a dependency which is not on PyPi. 'installation': [{ 'type': metadata_module.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://github.com/NewKnowledge/duke-thin-client.git@{git_commit}#egg=DukeThinClient' .format(git_commit=utils.current_git_commit( os.path.dirname(__file__)), ), }], # The same path the primitive is registered with entry points in setup.py. 'python_path': 'd3m.primitives.distil.duke', # Choose these from a controlled vocabulary in the schema. If anything is missing which would # best describe the primitive, make a merge request. 'algorithm_types': [ metadata_module.PrimitiveAlgorithmType.RECURRENT_NEURAL_NETWORK, ], 'primitive_family': metadata_module.PrimitiveFamily.DATA_CLEANING, }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: typing.Dict[str, str] = None) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._decoder = JSONDecoder() self._params = {} def fit(self) -> None: pass def get_params(self) -> Params: return self._params def set_params(self, *, params: Params) -> None: self.params = params def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: pass def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Produce primitive's best guess for the structural type of each input column. Parameters ---------- inputs : Input pandas frame Returns ------- Outputs The outputs is a list that has length equal to number of columns in input pandas frame. Each entry is a list of strings corresponding to each column's multi-label classification. """ """ Accept a pandas data frame, predicts column types in it frame: a pandas data frame containing the data to be processed -> a list of lists of column labels """ filename = inputs[1] files = {'file': open(filename, 'rb')} try: r = requests.post(inputs[0] + "/fileUpload", files=files) return self._decoder.decode(r.text) except: # Should probably do some more sophisticated error logging here return "Failed processing input file"
class RFMPreconditionedGaussianKRR(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): """ Performs gaussian kernel regression using a random feature map to precondition the problem for faster convergence: forms the kernel K_{ij} = exp(-||x_i - x_j||^2/(2sigma^2)) and solves alphahat = argmin ||K alpha - y||_F^2 + lambda ||alpha||_F^2 predictions are then formed by ypred = K(trainingData, x) alphahat """ __author__ = "ICSI" # a la directions on https://gitlab.datadrivendiscovery.org/jpl/primitives_repo metadata = metadata_module.PrimitiveMetadata({ 'id': '511c3536-2fff-369a-b81d-96755ff5b81b', 'version': __version__, 'name': 'RFM Preconditioned Gaussian Kernel Ridge Regression', 'description': 'Gaussian regression using random fourier features as a preconditioner for faster solves', 'python_path': 'd3m.primitives.realML.kernel.RFMPreconditionedGaussianKRR', 'primitive_family': metadata_module.PrimitiveFamily.REGRESSION, 'algorithm_types' : [ metadata_module.PrimitiveAlgorithmType.KERNEL_METHOD ], 'keywords' : ['kernel learning', 'kernel ridge regression', 'preconditioned CG', 'Gaussian', 'RBF', 'regression'], 'source' : { 'name': __author__, 'contact': 'mailto:[email protected]', 'uris' : [ "http://*****:*****@{git_commit}#egg=realML'.format(git_commit=utils.current_git_commit(os.path.dirname(__file__))) } ], 'location_uris': [ # NEED TO REF SPECIFIC COMMIT 'https://github.com/alexgittens/realML/blob/master/realML/kernel/RFMPreconditionedGaussianKRR.py', ], 'preconditions': [ metadata_module.PrimitivePrecondition.NO_MISSING_VALUES, metadata_module.PrimitivePrecondition.NO_CATEGORICAL_VALUES ], }) def __init__(self, *, hyperparams : Hyperparams, random_seed: int = 0, docker_containers : Dict[str, str] = None) -> None: """ Initializes the preconditioned gaussian kernel ridge regression primitive. """ super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._seed = random_seed self._Xtrain = None self._ytrain = None self._fitted = False np.random.seed(random_seed) def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: """ Sets the training data: Input: array, shape = [n_samples, n_features] Output: array, shape = [n_samples, n_targets] Only uses one input and output """ self._Xtrain = inputs self._ytrain = outputs maxPCGsize = 20000 # TODO: make a control hyperparameter for when to switch to GS if len(self._ytrain.shape) == 1: self._ytrain = np.expand_dims(self._ytrain, axis=1) if self._Xtrain.shape[0] > maxPCGsize: print("need to implement Gauss-Siedel for large datasets; currently training with a smaller subset") choices = np.random.choice(self._Xtrain.shape[0], size=maxPCGsize, replace=False) self._Xtrain = self._Xtrain[choices, :] self._ytrain = self._ytrain[choices, :] self._n, self._d = self._Xtrain.shape self._fitted = False def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ Learns the kernel regression coefficients alpha given training pairs (X,y) """ if self._fitted: return CallResult(None) if self._Xtrain is None or self._ytrain is None: raise ValueError("Missing training data.") self._U = generateGaussianPreconditioner(self._Xtrain, self.hyperparams['sigma'], self.hyperparams['lparam']) def mykernel(X, Y): return GaussianKernel(X, Y, self.hyperparams['sigma']) self._coeffs = PCGfit(self._Xtrain, self._ytrain, mykernel, self._U, self.hyperparams['lparam'], self.hyperparams['eps'], self.hyperparams['maxIters']) self._fitted = True return CallResult(None) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Predict the value for each sample in X Inputs: X: array of shape [n_samples, n_features] Outputs: y: array of shape [n_samples, n_targets] """ return CallResult(GaussianKernel(inputs, self._Xtrain, self.hyperparams['sigma']).dot(self._coeffs).flatten()) def set_params(self, *, params: Params) -> None: self._Xtrain = params['exemplars'] self._coeffs = params['coeffs'] def get_params(self) -> Params: return Params(exemplars=self._Xtrain, coeffs=self._coeffs)
class AdjacencySpectralEmbedding(TransformerPrimitiveBase[Inputs, Outputs, Hyperparams]): # This should contain only metadata which cannot be automatically determined from the code. metadata = metadata_module.PrimitiveMetadata({ # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()". 'id': 'b940ccbd-9e9b-3166-af50-210bfd79251b', 'version': "0.3.0", 'name': "jhu.ase", # The same path the primitive is registered with entry points in setup.py. 'python_path': 'd3m.primitives.jhu_primitives.AdjacencySpectralEmbedding', # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable. 'keywords': ['ase primitive'], 'source': { 'name': "JHU", 'uris': [ # Unstructured URIs. Link to file and link to repo in this case. 'https://github.com/youngser/D3M/primitives-interfaces/jhu_primitives/ase/ase.py', # 'https://github.com/youngser/primitives-interfaces/blob/jp-devM1/jhu_primitives/ase/ase.py', 'https://github.com/youngser/D3M/primitives-interfaces.git', ], }, # A list of dependencies in order. These can be Python packages, system packages, or Docker images. # Of course Python packages can also have their own dependencies, but sometimes it is necessary to # install a Python package first to be even able to run setup.py of another package. Or you have # a dependency which is not on PyPi. 'installation': [{ 'type': metadata_module.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://github.com/youngser/D3M/primitives-interfaces.git@{git_commit}#egg=jhu.ase'.format( git_commit=utils.current_git_commit(os.path.dirname(__file__)), ), }], # URIs at which one can obtain code for the primitive, if available. # 'location_uris': [ # 'https://gitlab.com/datadrivendiscovery/tests-data/raw/{git_commit}/primitives/test_primitives/monomial.py'.format( # git_commit=utils.current_git_commit(os.path.dirname(__file__)), # ), # ], # Choose these from a controlled vocabulary in the schema. If anything is missing which would # best describe the primitive, make a merge request. 'algorithm_types': [ "HIGHER_ORDER_SINGULAR_VALUE_DECOMPOSITION" ], 'primitive_family': "DATA_TRANSFORMATION" }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, str] = None) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: # def embed(self, *, g : JHUGraph, dim: int): """ Perform Adjacency Spectral Embedding on a graph TODO: YP description **Positional Arguments:** g: - Graph in JHUGraph format **Optional Arguments:** dim: - The number of dimensions in which to embed the data """ dim = self.hyperparams['dim'] path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "ase.interface.R") cmd = """ source("%s") fn <- function(inputs, dim) { ase.interface(inputs, dim) } """ % path print(cmd) result = np.array(robjects.r(cmd)(inputs, dim)) outputs = container.ndarray(result) return base.CallResult(outputs) #return np.array(robjects.r(cmd)(g._object, dim)) def set_training_data(self) -> None: # type: ignore """ A noop. """ return def fit(self, *, timeout: float = None, iterations: int = None) -> None: """ A noop. """ return def get_params(self) -> None: """ A noop. """ return None def set_params(self, *, params: None) -> None: """ A noop. """ return