Пример #1
0
 def test_smooth(self):
     M = 101
     q1 = np.zeros((M, 1))
     q1[:, 0] = np.sin(np.linspace(0, 2 * np.pi, M)).T
     q1a = fs.smooth_data(q1, 1)
     q1b = fs.smooth_data(q1, 1)
     self.assertAlmostEqual(sum(q1a.flatten() - q1b.flatten()), 0)
Пример #2
0
    def predict(self, newdata=None, parallel=True):
        """
        This function performs prediction on regression model on new data if available or current stored data in object
        Usage:  obj.predict()
                obj.predict(newdata)

        :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data)
        :type newdata: dict
        :param f: (M,N) matrix of functions
        :param time: vector of time points
        :param y: truth if available
        :param smooth: smooth data if needed
        :param sparam: number of times to run filter
        """

        if newdata != None:
            f = newdata['f']
            time = newdata['time']
            y = newdata['y']
            sparam = newdata['sparam']
            if newdata['smooth']:
                f = fs.smooth_data(f, sparam)

            n = f.shape[1]
            yhat1 = self.alpha + MapC_to_y(n, self.b, self.B, time, f,
                                           parallel)
            yhat = np.polyval(self.h, yhat1)

            if y is None:
                self.SSE = np.nan
            else:
                self.SSE = np.sum((y - yhat)**2)

            self.y_pred = yhat

        else:
            n = self.f.shape[1]
            yhat1 = self.alpha + MapC_to_y(n, self.b, self.B, self.time,
                                           self.f, parallel)
            yhat = np.polyval(self.h, yhat1)
            self.SSE = np.sum((self.y - yhat)**2)
            self.y_pred = yhat

        return
Пример #3
0
    def predict(self, newdata=None):
        """
        This function performs prediction on regression model on new data if available or current stored data in object
        Usage:  obj.predict()
                obj.predict(newdata)

        :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data)
        :type newdata: dict
        :param f: (M,N) matrix of functions
        :param time: vector of time points
        :param y: truth if available
        :param smooth: smooth data if needed
        :param sparam: number of times to run filter
        """

        omethod = self.warp_data.method
        lam = self.warp_data.lam
        m = self.n_classes
        M = self.time.shape[0]

        if newdata != None:
            f = newdata['f']
            time = newdata['time']
            y = newdata['y']
            sparam = newdata['sparam']
            if newdata['smooth']:
                f = fs.smooth_data(f, sparam)

            q1 = fs.f_to_srsf(f, time)
            n = q1.shape[1]
            self.y_pred = np.zeros((n, m))
            mq = self.warp_data.mqn
            fn = np.zeros((M, n))
            qn = np.zeros((M, n))
            gam = np.zeros((M, n))
            for ii in range(0, n):
                gam[:, ii] = uf.optimum_reparam(mq, time, q1[:, ii], omethod)
                fn[:, ii] = uf.warp_f_gamma(time, f[:, ii], gam[:, ii])
                qn[:, ii] = uf.f_to_srsf(fn[:, ii], time)

            m_new = np.sign(fn[self.pca.id, :]) * np.sqrt(
                np.abs(fn[self.pca.id, :]))
            qn1 = np.vstack((qn, m_new))
            U = self.pca.U
            no = U.shape[1]

            if self.pca.__class__.__name__ == 'fdajpca':
                C = self.pca.C
                TT = self.time.shape[0]
                mu_g = self.pca.mu_g
                mu_psi = self.pca.mu_psi
                vec = np.zeros((M, n))
                psi = np.zeros((TT, n))
                binsize = np.mean(np.diff(self.time))
                for i in range(0, n):
                    psi[:, i] = np.sqrt(np.gradient(gam[:, i], binsize))
                    vec[:, i] = geo.inv_exp_map(mu_psi, psi[:, i])

                g = np.vstack((qn1, C * vec))
                a = np.zeros((n, no))
                for i in range(0, n):
                    for j in range(0, no):
                        tmp = (g[:, i] - mu_g)
                        a[i, j] = dot(tmp.T, U[:, j])

            elif self.pca.__class__.__name__ == 'fdavpca':
                a = np.zeros((n, no))
                for i in range(0, n):
                    for j in range(0, no):
                        tmp = (qn1[:, i] - self.pca.mqn)
                        a[i, j] = dot(tmp.T, U[:, j])

            elif self.pca.__class__.__name__ == 'fdahpca':
                a = np.zeros((n, no))
                mu_psi = self.pca.psi_mu
                vec = np.zeros((M, n))
                TT = self.time.shape[0]
                psi = np.zeros((TT, n))
                binsize = np.mean(np.diff(self.time))
                for i in range(0, n):
                    psi[:, i] = np.sqrt(np.gradient(gam[:, i], binsize))
                    vec[:, i] = geo.inv_exp_map(mu_psi, psi[:, i])

                vm = self.pca.vec.mean(axis=1)

                for i in range(0, n):
                    for j in range(0, no):
                        a[i, j] = np.sum(dot(vec[:, i] - vm, U[:, j]))
            else:
                raise Exception('Invalid fPCA Method')

            for ii in range(0, n):
                for jj in range(0, m):
                    self.y_pred[ii, jj] = self.alpha[jj] + np.sum(
                        a[ii, :] * self.b[:, jj])

            if y == None:
                self.y_pred = rg.phi(self.y_pred.reshape((1, n * m)))
                self.y_pred = self.y_pred.reshape((n, m))
                self.y_labels = np.argmax(self.y_pred, axis=1)
                self.PC = np.nan
            else:
                self.y_pred = rg.phi(self.y_pred.reshape((1, n * m)))
                self.y_pred = self.y_pred.reshape((n, m))
                self.y_labels = np.argmax(self.y_pred, axis=1)
                self.PC = np.zeros(m)
                cls_set = np.arange(0, m)
                for ii in range(0, m):
                    cls_sub = np.setdiff1d(cls_set, ii)
                    TP = np.sum(y[self.y_labels == ii] == ii)
                    FP = np.sum(y[np.in1d(self.y_labels, cls_sub)] == ii)
                    TN = np.sum(y[np.in1d(self.y_labels, cls_sub)] ==
                                self.y_labels[np.in1d(self.y_labels, cls_sub)])
                    FN = np.sum(np.in1d(y[self.y_labels == ii], cls_sub))
                    self.PC[ii] = (TP + TN) / (TP + FP + FN + TN)

                self.PCo = np.sum(y == self.y_labels) / self.y_labels.shape[0]
        else:
            n = self.pca.coef.shape[1]
            self.y_pred = np.zeros((n, m))
            for ii in range(0, n):
                for jj in range(0, m):
                    self.y_pred[ii, jj] = self.alpha[jj] + np.sum(
                        self.pca.coef[ii, :] * self.b[:, jj])

            self.y_pred = rg.phi(self.y_pred.reshape((1, n * m)))
            self.y_pred = self.y_pred.reshape((n, m))
            self.y_labels = np.argmax(self.y_pred, axis=1)
            self.PC = np.zeros(m)
            cls_set = np.arange(0, m)
            for ii in range(0, m):
                cls_sub = np.setdiff1d(cls_set, ii)
                TP = np.sum(self.y[self.y_labels == ii] == ii)
                FP = np.sum(self.y[np.in1d(self.y_labels, cls_sub)] == ii)
                TN = np.sum(self.y[np.in1d(self.y_labels, cls_sub)] ==
                            self.y_labels[np.in1d(self.y_labels, cls_sub)])
                FN = np.sum(np.in1d(y[self.y_labels == ii], cls_sub))
                self.PC[ii] = (TP + TN) / (TP + FP + FN + TN)

            self.PCo = np.sum(y == self.y_labels) / self.y_labels.shape[0]

            return
Пример #4
0
    def calc_model(self,
                   pca_method="combined",
                   no=5,
                   smooth_data=False,
                   sparam=25,
                   parallel=False,
                   C=None):
        """
        This function identifies a regression model with phase-variability
        using elastic pca

        :param pca_method: string specifing pca method (options = "combined",
                        "vert", or "horiz", default = "combined")
        :param no: scalar specify number of principal components (default=5)
        :param smooth_data: smooth data using box filter (default = F)
        :param sparam: number of times to apply box filter (default = 25)
        :param parallel: run in parallel (default = F)
        :param C: scale balance parameter for combined method (default = None)
        """

        if smooth_data:
            self.f = fs.smooth_data(self.f, sparam)

        N1 = self.f.shape[1]

        # Align Data
        self.warp_data = fs.fdawarp(self.f, self.time)
        self.warp_data.srsf_align(parallel=parallel)

        # Calculate PCA
        if pca_method == 'combined':
            out_pca = fpca.fdajpca(self.warp_data)
        elif pca_method == 'vert':
            out_pca = fpca.fdavpca(self.warp_data)
        elif pca_method == 'horiz':
            out_pca = fpca.fdahpca(self.warp_data)
        else:
            raise Exception('Invalid fPCA Method')
        out_pca.calc_fpca(no)

        # OLS using PCA basis
        lam = 0
        R = 0
        Phi = np.ones((N1, no + 1))
        Phi[:, 1:(no + 1)] = out_pca.coef
        xx = dot(Phi.T, Phi)
        inv_xx = inv(xx + lam * R)
        xy = dot(Phi.T, self.y)
        b = dot(inv_xx, xy)
        alpha = b[0]
        b = b[1:no + 1]

        # compute the SSE
        int_X = np.zeros(N1)
        for ii in range(0, N1):
            int_X[ii] = np.sum(out_pca.coef * b)

        SSE = np.sum((self.y - alpha - int_X)**2)

        self.alpha = alpha
        self.b = b
        self.pca = out_pca
        self.SSE = SSE
        self.pca_method = pca_method

        return
Пример #5
0
    def calc_model(self,
                   pca_method="combined",
                   no=5,
                   smooth_data=False,
                   sparam=25,
                   parallel=False):
        """
        This function identifies a logistic regression model with phase-variability
        using elastic pca

        :param f: numpy ndarray of shape (M,N) of N functions with M samples
        :param y: numpy array of N responses
        :param time: vector of size M describing the sample points
        :param pca_method: string specifing pca method (options = "combined",
                        "vert", or "horiz", default = "combined")
        :param no: scalar specify number of principal components (default=5)
        :param smooth_data: smooth data using box filter (default = F)
        :param sparam: number of times to apply box filter (default = 25)
        :param parallel: run model in parallel (default = F)
        :type f: np.ndarray
        :type time: np.ndarray
        """

        if smooth_data:
            self.f = fs.smooth_data(self.f, sparam)

        N1 = self.f.shape[1]

        # Align Data
        self.warp_data = fs.fdawarp(self.f, self.time)
        self.warp_data.srsf_align(parallel=parallel)

        # Calculate PCA
        if pca_method == 'combined':
            out_pca = fpca.fdajpca(self.warp_data)
        elif pca_method == 'vert':
            out_pca = fpca.fdavpca(self.warp_data)
        elif pca_method == 'horiz':
            out_pca = fpca.fdahpca(self.warp_data)
        else:
            raise Exception('Invalid fPCA Method')
        out_pca.calc_fpca(no)

        # OLS using PCA basis
        lam = 0
        R = 0
        Phi = np.ones((N1, no + 1))
        Phi[:, 1:(no + 1)] = out_pca.coef
        # Find alpha and beta using l_bfgs
        b0 = np.zeros(self.n_classes * (no + 1))
        out = fmin_l_bfgs_b(rg.mlogit_loss,
                            b0,
                            fprime=rg.mlogit_gradient,
                            args=(Phi, self.Y),
                            pgtol=1e-10,
                            maxiter=200,
                            maxfun=250,
                            factr=1e-30)

        b = out[0]
        B0 = b.reshape(no + 1, self.n_classes)
        alpha = B0[0, :]

        # compute the Loss
        LL = rg.mlogit_loss(b, Phi, self.y)

        b = B0[1:no + 1, :]

        self.alpha = alpha
        self.b = b
        self.pca = out_pca
        self.LL = LL
        self.pca_method = pca_method

        return
Пример #6
0
    def predict(self, newdata=None):
        """
        This function performs prediction on regression model on new data if available or current stored data in object
        Usage:  obj.predict()
                obj.predict(newdata)

        :param newdata: dict containing new data for prediction (needs the keys below, if None predicts on training data)
        :type newdata: dict
        :param f: (M,N) matrix of functions
        :param time: vector of time points
        :param y: truth if available
        :param smooth: smooth data if needed
        :param sparam: number of times to run filter
        """

        omethod = self.warp_data.method
        lam = self.warp_data.lam
        M = self.time.shape[0]

        if newdata != None:
            f = newdata['f']
            time = newdata['time']
            y = newdata['y']
            if newdata['smooth']:
                sparam = newdata['sparam']
                f = fs.smooth_data(f,sparam)
            
            q1 = fs.f_to_srsf(f,time)
            n = q1.shape[1]
            self.y_pred = np.zeros(n)
            mq = self.warp_data.mqn
            fn = np.zeros((M,n))
            qn = np.zeros((M,n))
            gam = np.zeros((M,n))
            for ii in range(0,n):
                gam[:,ii] = uf.optimum_reparam(mq,time,q1[:,ii],omethod,lam)
                fn[:,ii] = uf.warp_f_gamma(time,f[:,ii],gam[:,ii])
                qn[:,ii] = uf.f_to_srsf(fn[:,ii],time)
            
            U = self.pca.U
            no = U.shape[1]

            if self.pca.__class__.__name__ == 'fdajpca':
                m_new = np.sign(fn[self.pca.id,:])*np.sqrt(np.abs(fn[self.pca.id,:]))
                qn1 = np.vstack((qn, m_new))
                C = self.pca.C
                TT = self.time.shape[0]
                mu_g = self.pca.mu_g
                mu_psi = self.pca.mu_psi
                vec = np.zeros((M,n))
                psi = np.zeros((TT,n))
                binsize = np.mean(np.diff(self.time))
                for i in range(0,n):
                    psi[:,i] = np.sqrt(np.gradient(gam[:,i],binsize))
                    out, theta = geo.inv_exp_map(mu_psi, psi[:,i])
                    vec[:,i] = out
                
                g = np.vstack((qn1, C*vec))
                a = np.zeros((n,no))
                for i in range(0,n):
                    for j in range(0,no):
                        tmp = (g[:,i]-mu_g)
                        a[i,j] = np.dot(tmp.T, U[:,j])

            elif self.pca.__class__.__name__ == 'fdavpca':
                m_new = np.sign(fn[self.pca.id,:])*np.sqrt(np.abs(fn[self.pca.id,:]))
                qn1 = np.vstack((qn, m_new))
                a = np.zeros((n,no))
                for i in range(0,n):
                    for j in range(0,no):
                        tmp = (qn1[:,i]-self.pca.mqn)
                        a[i,j] = np.dot(tmp.T, U[:,j])

            elif self.pca.__class__.__name__ == 'fdahpca':
                a = np.zeros((n,no))
                mu_psi = self.pca.psi_mu
                vec = np.zeros((M,n))
                TT = self.time.shape[0]
                psi = np.zeros((TT,n))
                binsize = np.mean(np.diff(self.time))
                for i in range(0,n):
                    psi[:,i] = np.sqrt(np.gradient(gam[:,i],binsize))
                    out, theta = geo.inv_exp_map(mu_psi, psi[:,i])
                    vec[:,i] = out
                
                vm = self.pca.vec.mean(axis=1)

                for i in range(0,n):
                    for j in range(0,no):
                        a[i,j] = np.sum(np.dot(vec[:,i]-vm,U[:,j]))
            else: 
                raise Exception('Invalid fPCA Method')

            for ii in range(0,n):
                self.y_pred[ii] = self.alpha + np.dot(a[ii,:],self.b)
            
            if y is None:
                self.SSE = np.nan
            else:
                self.SSE = np.sum((y-self.y_pred)**2)
        else:
            n = self.pca.coef.shape[0]
            self.y_pred = np.zeros(n)
            for ii in range(0,n):
                self.y_pred[ii] = self.alpha + np.dot(self.pca.coef[ii,:],self.b)
            
            self.SSE = np.sum((self.y-self.y_pred)**2)

        return
Пример #7
0
    def calc_model(self,
                   link='linear',
                   B=None,
                   lam=0,
                   df=20,
                   max_itr=20,
                   smooth_data=False,
                   sparam=25,
                   parallel=False):
        """
        This function identifies a regression model with phase-variability
        using elastic pca

        :param link: string of link function ('linear', 'quadratic', 'cubic')
        :param B: optional matrix describing Basis elements
        :param lam: regularization parameter (default 0)
        :param df: number of degrees of freedom B-spline (default 20)
        :param max_itr: maximum number of iterations (default 20)
        :param smooth_data: smooth data using box filter (default = F)
        :param sparam: number of times to apply box filter (default = 25)
        :param parallel: run in parallel (default = F)
        """
        if smooth_data:
            self.f = fs.smooth_data(self.f, sparam)

        print("Link: %s" % link)
        print("Lambda: %5.1f" % lam)

        self.lam = lam
        self.link = link

        # Create B-Spline Basis if none provided
        if B is None:
            B = bs(self.time, df=df, degree=4, include_intercept=True)
        Nb = B.shape[1]
        self.B = B

        n = self.f.shape[1]

        print("Initializing")
        b0 = rand(Nb + 1)
        out = minimize(MyLogLikelihoodFn,
                       b0,
                       args=(self.y, self.B, self.time, self.f, parallel),
                       method="SLSQP")

        a = out.x

        if self.link == 'linear':
            h1, c_hat, cost = Amplitude_Index(self.f, self.time, self.B,
                                              self.y, max_itr, a, 1, parallel)
            yhat1 = c_hat[0] + MapC_to_y(n, c_hat[1:], self.B, self.time,
                                         self.f, parallel)
            yhat = np.polyval(h1, yhat1)
        elif self.link == 'quadratic':
            h1, c_hat, cost = Amplitude_Index(self.f, self.time, self.B,
                                              self.y, max_itr, a, 2, parallel)
            yhat1 = c_hat[0] + MapC_to_y(n, c_hat[1:], self.B, self.time,
                                         self.f, parallel)
            yhat = np.polyval(h1, yhat1)
        elif self.link == 'cubic':
            h1, c_hat, cost = Amplitude_Index(self.f, self.time, self.B,
                                              self.y, max_itr, a, 3, parallel)
            yhat1 = c_hat[0] + MapC_to_y(n, c_hat[1:], self.B, self.time,
                                         self.f, parallel)
            yhat = np.polyval(h1, yhat1)
        else:
            raise Exception('Invalid Link')

        tmp = (self.y - yhat)**2
        self.SSE = tmp.sum()
        self.h = h1
        self.alpha = c_hat[0]
        self.b = c_hat[1:]

        return