Exemplo n.º 1
0
    def fit(self, data):
        """ Fit the iONMF model.

        Parameters
        ----------

        data: dictionary of array describing samples with multiple data sources.
        data = {
            "data_source_1": X_1 array [n_samples, n_features_1],
            "data_source_2": X_2 array [n_samples, n_features_2],
            ...
            "data_source_N": X_N array [n_samples, n_features_N],
        }
        Data sources must match in the number of rows.
        """

        self.keys_ = sorted(data.keys())
        self.n_    = [data[ky].shape[1] for ky in self.keys_]
        self.m     = data[self.keys_[0]].shape[0]
        if not all([data[ky].shape[0] == self.m for ky in self.keys_]):
            raise ValueError("The number of rows must match for all matrices!")

        # Fill training data matrix
        X          = zeros((self.m, sum(self.n_)))
        t          = 0
        for ny, ky in zip(self.n_, self.keys_,):
            X[:, t:t+ny] = data[ky]
            t += ny

        # Run factorization
        W, H  = onmf(X, rank=self.rank,
                     max_iter=self.max_iter,
                     alpha=self.alpha)

        # Set model variables
        self.coef_  = W
        self.basis_ = dict()
        t           = 0
        for ny, ky in zip(self.n_, self.keys_):
            self.basis_[ky] = H[:, t:t+ny]
            t += ny

        self.instantiated = True
Exemplo n.º 2
0
    def predict(self, data_test):
        """ Predict the values for test samples based on a (non-empty)
        subset of avalible data sources .

        Parameters
        ----------

        data_test: dictionary of array describing samples with multiple data sources.
        data_test = {
            "data_source_1": X_1 array [n_samples, n_features_1],
            "data_source_2": X_2 array [n_samples, n_features_2],
            ...
            "data_source_N": X_N array [n_samples, n_features_N],
        }
        Test data sources must match in the number of rows.
        Dictionary keys must match keys in the traning data.
        """

        if not self.instantiated:
            raise ValueError("Run the method iONMF.fit first!")

        keys_test = sorted(data_test.keys())
        if set(keys_test) - set(self.keys_):
            raise ValueError("Test data dictionary contains unknown keys!")

        m_test = len(data_test[keys_test[0]])
        if not all([data_test[ky].shape[0] == m_test for ky in keys_test]):
            raise ValueError("The number of rows must match for all matrices!")

        X_test = hstack([data_test[ky] for ky in keys_test])
        H_test = hstack([self.basis_[ky] for ky in keys_test])

        # Infer values of W for test samples based on a non-empty subset of
        #   observed data sources (matrices)
        W_test, _ = onmf(X_test, rank=self.rank, alpha=self.alpha,
                      H_init=H_test)

        # Predict all remaining matrices
        remaining_keys = set(self.keys_) - set(keys_test)
        predictions = dict([(ky, W_test.dot(self.basis_[ky]))
                            for ky in remaining_keys])

        return predictions