def test_sklearn_tensor(tmpdir): """Tests the sk-learn interface of the tensor factorisation estimator. The test creates a `DECOMPOSE` object and applies its `fit_transform` method to some low rank training data. The learned filter banks have to reconstruct the data very well. Then unseen test data is transformed into the learned basis. The test data has to be recoverd from the transformed representation. """ # create temporary directory where the model and its checkpoints are stored modelDirectory = str(tmpdir.mkdir("model")) # create a synthetic low rank dataset K, M_train, M_test = 3, [500, 100, 50], [500, 100, 50] lrData = LowRank(rank=K, M_train=M_train, M_test=M_test) # instantiate a model priors, K, dtype = [CenNormal(), CenNormal(), CenNormal()], K, np.float32 model = DECOMPOSE(modelDirectory, priors=priors, n_components=K, dtype=dtype) # train the model U0 = model.fit_transform(lrData.training) # check whether variance explained is between 0.95 and 1. U1, U2 = model.components_ assert (0.95 <= lrData.var_expl_training((U0, U1, U2)) <= 1.) # transform test data transformModelDirectory = str(tmpdir.mkdir("transformModel")) U0test = model.transform(transformModelDirectory=transformModelDirectory, X=lrData.test) assert (0.95 <= lrData.var_expl_test((U0test, U1, U2)) <= 1.)
def init(self, data: Tensor) -> None: tau = self.__tauInit dtype = self.__dtype properties = self.__properties noiseDistribution = CenNormal(tau=tf.constant([tau], dtype=dtype), properties=properties) self.__noiseDistribution = noiseDistribution
def test_sklearn_cv(tmpdir): """Tests the sk-learn interface of the tensor factorisation estimator. The test creates a `DECOMPOSE` object and applies its `fit_transform` method to some low rank training data. The learned filter banks have to reconstruct the data very well. Then unseen test data is transformed into the learned basis. The test data has to be recoverd from the transformed representation. """ # create temporary directory where the model and its checkpoints are stored modelDirectory = str(tmpdir.mkdir("model")) # create a synthetic low rank dataset K, M_train, M_test = 3, [30, 100, 150], [200, 100, 150] lrData = LowRank(rank=K, M_train=M_train, M_test=M_test) # instantiate a model priors, K, dtype = [CenNormal(), CenNormal(), CenNormal()], K, np.float32 model = DECOMPOSE(modelDirectory, priors=priors, n_components=K, isFullyObserved=False, cv=Block(nFolds=(2, 3, 3), foldNumber=3), dtype=dtype) # mark 20% of the elments as unobserved data = lrData.training.copy() r = np.random.random(data.shape) > 0.8 data[r] = np.nan # train the model U0 = model.fit_transform(data) # get mask marking the test set testMask = model.testMask # # check whether variance explained is between 0.95 and 1. U1, U2 = model.components_ testIndexes = testMask.flatten() recons = np.einsum("ka,kb,kc->abc", U0, U1, U2) testResiduals = (recons - lrData.training).flatten()[testIndexes] testData = lrData.training.flatten()[testIndexes] testVarExpl = 1. - np.var(testResiduals)/np.var(testData) assert(0.95 <= testVarExpl <= 1.) assert(0.95 <= lrData.var_expl_training((U0, U1, U2)) <= 1.)
def __init__(self, modelDirectory: str, priors: Tuple[Distribution, ...] = (CenNormal(), CenNormal()), n_components: int = 3, isFullyObserved: bool = True, dtype: type = np.float32, maxIterations: int = 100000, cv: CV = None, noiseUniformity: NoiseUniformity = HOMOGENEOUS, stopCriterionInit: StopCriterion = LlhStall(100), stopCriterionEM: StopCriterion = LlhStall(100), stopCriterionBCD: StopCriterion = LlhImprovementThreshold(.1), device: str = "/cpu:0") -> None: self.__isFullyObserved = isFullyObserved self.__maxIterations = maxIterations self.__n_components = n_components self.__priors = priors self.__dtype = dtype self.__cv = cv self.__modelDirectory = modelDirectory self.__device = device self.__noiseUniformity = noiseUniformity self.__stopCriterionInit = stopCriterionInit self.__stopCriterionEM = stopCriterionEM self.__stopCriterionBCD = stopCriterionBCD tefa = TensorFactorisation.getEstimator( priors=priors, K=self.n_components, isFullyObserved=isFullyObserved, dtype=tf.as_dtype(dtype), path=modelDirectory, noiseUniformity=noiseUniformity, cv=cv, stopCriterionInit=stopCriterionInit, stopCriterionEM=stopCriterionEM, stopCriterionBCD=stopCriterionBCD, device=self.__device) self.__tefa = tefa
def test_sklearn_cv(tmpdir): """Tests the sk-learn interface of the tensor factorisation estimator. The test creates a `DECOMPOSE` object and applies its `fit_transform` method to some low rank training data. The learned filter banks have to reconstruct the data very well. Then unseen test data is transformed into the learned basis. The test data has to be recoverd from the transformed representation. """ # create temporary directory where the model and its checkpoints are stored modelDirectory = str(tmpdir.mkdir("model")) # create a synthetic low rank dataset K, M_train, M_test = 3, [500, 100], [200, 100] lrData = LowRank(rank=K, M_train=M_train, M_test=M_test) # instantiate a model priors, K, dtype = [CenNormal(), CenNormal()], K, np.float32 model = DECOMPOSE(modelDirectory, priors=priors, n_components=K, cv=Block(nFolds=(3, 3), foldNumber=3), dtype=dtype) # train the model U0 = model.fit_transform(lrData.training) # get mask marking the training set testMask = model.testMask # check whether variance explained is between 0.95 and 1. U1 = model.components_ testIndexes = testMask.flatten() testResiduals = (np.dot(U0.T, U1) - lrData.training).flatten()[testIndexes] testData = lrData.training.flatten()[testIndexes] testVarExpl = 1. - np.var(testResiduals) / np.var(testData) print("testVarExpl", testVarExpl) assert (0.95 <= lrData.var_expl_training((U0, U1)) <= 1.)
def init(self, data: Tensor) -> None: tau = self.__tauInit dtype = self.__dtype properties = self.__properties noiseDistribution = CenNormal(tau=tf.constant([tau], dtype=dtype), properties=properties) self.__noiseDistribution = noiseDistribution observedMask = tf.logical_not(tf.is_nan(data)) trainMask = tf.logical_not(self.cv.mask(X=data)) trainMask = tf.get_variable("trainMask", dtype=trainMask.dtype, initializer=trainMask) trainMask = tf.logical_and(trainMask, observedMask) testMask = tf.logical_and(observedMask, tf.logical_not(trainMask)) self.__observedMask = observedMask self.__trainMask = trainMask self.__testMask = testMask
def init(self, data: Tensor) -> None: tau = self.__tauInit properties = self.__properties tau = tf.ones_like(data[0]) * tau # TODO is using ones really useful noiseDistribution = CenNormal(tau=tau, properties=properties) self.__noiseDistribution = noiseDistribution