Пример #1
0
def test_sklearn_tensor(tmpdir):
    """Tests the sk-learn interface of the tensor factorisation estimator.

    The test creates a `DECOMPOSE` object and applies its `fit_transform`
    method to some low rank training data. The learned filter banks have
    to reconstruct the data very well. Then unseen test data is transformed
    into the learned basis. The test data has to be recoverd from the
    transformed representation.
    """
    # create temporary directory where the model and its checkpoints are stored
    modelDirectory = str(tmpdir.mkdir("model"))

    # create a synthetic low rank dataset
    K, M_train, M_test = 3, [500, 100, 50], [500, 100, 50]
    lrData = LowRank(rank=K, M_train=M_train, M_test=M_test)

    # instantiate a model
    priors, K, dtype = [CenNormal(), CenNormal(), CenNormal()], K, np.float32
    model = DECOMPOSE(modelDirectory,
                      priors=priors,
                      n_components=K,
                      dtype=dtype)

    # train the model
    U0 = model.fit_transform(lrData.training)

    # check whether variance explained is between 0.95 and 1.
    U1, U2 = model.components_
    assert (0.95 <= lrData.var_expl_training((U0, U1, U2)) <= 1.)

    # transform test data
    transformModelDirectory = str(tmpdir.mkdir("transformModel"))
    U0test = model.transform(transformModelDirectory=transformModelDirectory,
                             X=lrData.test)
    assert (0.95 <= lrData.var_expl_test((U0test, U1, U2)) <= 1.)
Пример #2
0
 def init(self, data: Tensor) -> None:
     tau = self.__tauInit
     dtype = self.__dtype
     properties = self.__properties
     noiseDistribution = CenNormal(tau=tf.constant([tau], dtype=dtype),
                                   properties=properties)
     self.__noiseDistribution = noiseDistribution
Пример #3
0
def test_sklearn_cv(tmpdir):
    """Tests the sk-learn interface of the tensor factorisation estimator.

    The test creates a `DECOMPOSE` object and applies its `fit_transform`
    method to some low rank training data. The learned filter banks have
    to reconstruct the data very well. Then unseen test data is transformed
    into the learned basis. The test data has to be recoverd from the
    transformed representation.
    """
    # create temporary directory where the model and its checkpoints are stored
    modelDirectory = str(tmpdir.mkdir("model"))

    # create a synthetic low rank dataset
    K, M_train, M_test = 3, [30, 100, 150], [200, 100, 150]
    lrData = LowRank(rank=K, M_train=M_train, M_test=M_test)

    # instantiate a model
    priors, K, dtype = [CenNormal(), CenNormal(), CenNormal()], K, np.float32
    model = DECOMPOSE(modelDirectory, priors=priors, n_components=K,
                      isFullyObserved=False,
                      cv=Block(nFolds=(2, 3, 3), foldNumber=3), dtype=dtype)

    # mark 20% of the elments as unobserved
    data = lrData.training.copy()
    r = np.random.random(data.shape) > 0.8
    data[r] = np.nan

    # train the model
    U0 = model.fit_transform(data)

    # get mask marking the test set
    testMask = model.testMask

    # # check whether variance explained is between 0.95 and 1.
    U1, U2 = model.components_
    testIndexes = testMask.flatten()
    recons = np.einsum("ka,kb,kc->abc", U0, U1, U2)
    testResiduals = (recons - lrData.training).flatten()[testIndexes]
    testData = lrData.training.flatten()[testIndexes]
    testVarExpl = 1. - np.var(testResiduals)/np.var(testData)
    assert(0.95 <= testVarExpl <= 1.)
    assert(0.95 <= lrData.var_expl_training((U0, U1, U2)) <= 1.)
Пример #4
0
 def __init__(self,
              modelDirectory: str,
              priors: Tuple[Distribution, ...] = (CenNormal(), CenNormal()),
              n_components: int = 3,
              isFullyObserved: bool = True,
              dtype: type = np.float32,
              maxIterations: int = 100000,
              cv: CV = None,
              noiseUniformity: NoiseUniformity = HOMOGENEOUS,
              stopCriterionInit: StopCriterion = LlhStall(100),
              stopCriterionEM: StopCriterion = LlhStall(100),
              stopCriterionBCD: StopCriterion = LlhImprovementThreshold(.1),
              device: str = "/cpu:0") -> None:
     self.__isFullyObserved = isFullyObserved
     self.__maxIterations = maxIterations
     self.__n_components = n_components
     self.__priors = priors
     self.__dtype = dtype
     self.__cv = cv
     self.__modelDirectory = modelDirectory
     self.__device = device
     self.__noiseUniformity = noiseUniformity
     self.__stopCriterionInit = stopCriterionInit
     self.__stopCriterionEM = stopCriterionEM
     self.__stopCriterionBCD = stopCriterionBCD
     tefa = TensorFactorisation.getEstimator(
         priors=priors,
         K=self.n_components,
         isFullyObserved=isFullyObserved,
         dtype=tf.as_dtype(dtype),
         path=modelDirectory,
         noiseUniformity=noiseUniformity,
         cv=cv,
         stopCriterionInit=stopCriterionInit,
         stopCriterionEM=stopCriterionEM,
         stopCriterionBCD=stopCriterionBCD,
         device=self.__device)
     self.__tefa = tefa
Пример #5
0
def test_sklearn_cv(tmpdir):
    """Tests the sk-learn interface of the tensor factorisation estimator.

    The test creates a `DECOMPOSE` object and applies its `fit_transform`
    method to some low rank training data. The learned filter banks have
    to reconstruct the data very well. Then unseen test data is transformed
    into the learned basis. The test data has to be recoverd from the
    transformed representation.
    """
    # create temporary directory where the model and its checkpoints are stored
    modelDirectory = str(tmpdir.mkdir("model"))

    # create a synthetic low rank dataset
    K, M_train, M_test = 3, [500, 100], [200, 100]
    lrData = LowRank(rank=K, M_train=M_train, M_test=M_test)

    # instantiate a model
    priors, K, dtype = [CenNormal(), CenNormal()], K, np.float32
    model = DECOMPOSE(modelDirectory,
                      priors=priors,
                      n_components=K,
                      cv=Block(nFolds=(3, 3), foldNumber=3),
                      dtype=dtype)

    # train the model
    U0 = model.fit_transform(lrData.training)

    # get mask marking the training set
    testMask = model.testMask

    # check whether variance explained is between 0.95 and 1.
    U1 = model.components_
    testIndexes = testMask.flatten()
    testResiduals = (np.dot(U0.T, U1) - lrData.training).flatten()[testIndexes]
    testData = lrData.training.flatten()[testIndexes]
    testVarExpl = 1. - np.var(testResiduals) / np.var(testData)
    print("testVarExpl", testVarExpl)
    assert (0.95 <= lrData.var_expl_training((U0, U1)) <= 1.)
Пример #6
0
 def init(self, data: Tensor) -> None:
     tau = self.__tauInit
     dtype = self.__dtype
     properties = self.__properties
     noiseDistribution = CenNormal(tau=tf.constant([tau], dtype=dtype),
                                   properties=properties)
     self.__noiseDistribution = noiseDistribution
     observedMask = tf.logical_not(tf.is_nan(data))
     trainMask = tf.logical_not(self.cv.mask(X=data))
     trainMask = tf.get_variable("trainMask",
                                 dtype=trainMask.dtype,
                                 initializer=trainMask)
     trainMask = tf.logical_and(trainMask, observedMask)
     testMask = tf.logical_and(observedMask, tf.logical_not(trainMask))
     self.__observedMask = observedMask
     self.__trainMask = trainMask
     self.__testMask = testMask
Пример #7
0
 def init(self, data: Tensor) -> None:
     tau = self.__tauInit
     properties = self.__properties
     tau = tf.ones_like(data[0]) * tau  # TODO is using ones really useful
     noiseDistribution = CenNormal(tau=tau, properties=properties)
     self.__noiseDistribution = noiseDistribution