Пример #1
0
def csv2st(csvfile, headers=False, stubs=False, title=None):
    """Return SimpleTable instance,
    created from the data in `csvfile`,
    which is in comma separated values format.
    The first row may contain headers: set headers=True.
    The first column may contain stubs: set stubs=True.
    Can also supply headers and stubs as tuples of strings.
    """
    rows = list()
    with open(csvfile, 'r') as fh:
        reader = csv.reader(fh)
        if headers is True:
            try:
                headers = next(reader)
            except NameError:  #must be Python 2.5 or earlier
                headers = next(reader)
        elif headers is False:
            headers = ()
        if stubs is True:
            stubs = list()
            for row in reader:
                if row:
                    stubs.append(row[0])
                    rows.append(row[1:])
        else:  #no stubs, or stubs provided
            for row in reader:
                if row:
                    rows.append(row)
        if stubs is False:
            stubs = ()
    nrows = len(rows)
    ncols = len(rows[0])
    if any(len(row) != ncols for row in rows):
        raise IOError('All rows of CSV file must have same length.')
    return SimpleTable(data=rows, headers=headers, stubs=stubs)
Пример #2
0
def csv2st(csvfile, headers=False, stubs=False, title=None):
    """Return SimpleTable instance,
    created from the data in `csvfile`,
    which is in comma separated values format.
    The first row may contain headers: set headers=True.
    The first column may contain stubs: set stubs=True.
    Can also supply headers and stubs as tuples of strings.
    """
    rows = list()
    with open(csvfile,'r') as fh:
        reader = csv.reader(fh)
        if headers is True:
            try:
                headers = next(reader)
            except NameError: #must be Python 2.5 or earlier
                headers = next(reader)
        elif headers is False:
            headers=()
        if stubs is True:
            stubs = list()
            for row in reader:
                if row:
                    stubs.append(row[0])
                    rows.append(row[1:])
        else: #no stubs, or stubs provided
            for row in reader:
                if row:
                    rows.append(row)
        if stubs is False:
            stubs = ()
    nrows = len(rows)
    ncols = len(rows[0])
    if any(len(row)!=ncols for row in rows):
        raise IOError('All rows of CSV file must have same length.')
    return SimpleTable(data=rows, headers=headers, stubs=stubs)
Пример #3
0
    def cv_loo(self, bw, func):
        r"""
        The cross-validation function with leave-one-out
        estimator

        Parameters
        ----------
        bw: array_like
            Vector of bandwidth values
        func: callable function
            Returns the estimator of g(x).
            Can be either ``_est_loc_constant`` (local constant) or
            ``_est_loc_linear`` (local_linear).

        Returns
        -------
        L: float
            The value of the CV function

        Notes
        -----
        Calculates the cross-validation least-squares
        function. This function is minimized by compute_bw
        to calculate the optimal value of bw

        For details see p.35 in [2]

        .. math:: CV(h)=n^{-1}\sum_{i=1}^{n}(Y_{i}-g_{-i}(X_{i}))^{2}

        where :math:`g_{-i}(X_{i})` is the leave-one-out estimator of g(X)
        and :math:`h` is the vector of bandwidths

        """
        LOO_X = LeaveOneOut(self.exog)
        LOO_Y = LeaveOneOut(self.endog).__iter__()
        LOO_W = LeaveOneOut(self.W_in).__iter__()
        L = 0
        for ii, X_not_i in enumerate(LOO_X):
            Y = next(LOO_Y)
            w = next(LOO_W)
            G = func(bw,
                     endog=Y,
                     exog=-X_not_i,
                     data_predict=-self.exog[ii, :],
                     W=w)[0]
            L += (self.endog[ii] - G)**2

        # Note: There might be a way to vectorize this. See p.72 in [1]
        return L / self.nobs
Пример #4
0
    def _compute_test_stat(self, u):
        n = np.shape(u)[0]
        XLOO = LeaveOneOut(self.exog)
        uLOO = LeaveOneOut(u[:,None]).__iter__()
        ival = 0
        S2 = 0
        for i, X_not_i in enumerate(XLOO):
            u_j = next(uLOO)
            u_j = np.squeeze(u_j)
            # See Bootstrapping procedure on p. 357 in [1]
            K = gpke(self.bw, data=-X_not_i, data_predict=-self.exog[i, :],
                     var_type=self.var_type, tosum=False)
            f_i = (u[i] * u_j * K)
            assert u_j.shape == K.shape
            ival += f_i.sum()  # See eq. 12.7 on p. 355 in [1]
            S2 += (f_i**2).sum()  # See Theorem 12.1 on p.356 in [1]
            assert np.size(ival) == 1
            assert np.size(S2) == 1

        ival *= 1. / (n * (n - 1))
        ix_cont = _get_type_pos(self.var_type)[0]
        hp = self.bw[ix_cont].prod()
        S2 *= 2 * hp / (n * (n - 1))
        T = n * ival * np.sqrt(hp / S2)
        return T
Пример #5
0
    def _compute_test_stat(self, u):
        n = np.shape(u)[0]
        XLOO = LeaveOneOut(self.exog)
        uLOO = LeaveOneOut(u[:, None]).__iter__()
        I = 0
        S2 = 0
        for i, X_not_i in enumerate(XLOO):
            u_j = next(uLOO)
            u_j = np.squeeze(u_j)
            # See Bootstrapping procedure on p. 357 in [1]
            K = gpke(self.bw,
                     data=-X_not_i,
                     data_predict=-self.exog[i, :],
                     var_type=self.var_type,
                     tosum=False)
            f_i = (u[i] * u_j * K)
            assert u_j.shape == K.shape
            I += f_i.sum()  # See eq. 12.7 on p. 355 in [1]
            S2 += (f_i**2).sum()  # See Theorem 12.1 on p.356 in [1]
            assert np.size(I) == 1
            assert np.size(S2) == 1

        I *= 1. / (n * (n - 1))
        ix_cont = _get_type_pos(self.var_type)[0]
        hp = self.bw[ix_cont].prod()
        S2 *= 2 * hp / (n * (n - 1))
        T = n * I * np.sqrt(hp / S2)
        return T
Пример #6
0
    def cv_loo(self, bw, func):
        r"""
        The cross-validation function with leave-one-out
        estimator

        Parameters
        ----------
        bw: array_like
            Vector of bandwidth values
        func: callable function
            Returns the estimator of g(x).
            Can be either ``_est_loc_constant`` (local constant) or
            ``_est_loc_linear`` (local_linear).

        Returns
        -------
        L: float
            The value of the CV function

        Notes
        -----
        Calculates the cross-validation least-squares
        function. This function is minimized by compute_bw
        to calculate the optimal value of bw

        For details see p.35 in [2]

        .. math:: CV(h)=n^{-1}\sum_{i=1}^{n}(Y_{i}-g_{-i}(X_{i}))^{2}

        where :math:`g_{-i}(X_{i})` is the leave-one-out estimator of g(X)
        and :math:`h` is the vector of bandwidths

        """
        LOO_X = LeaveOneOut(self.exog)
        LOO_Y = LeaveOneOut(self.endog).__iter__()
        LOO_W = LeaveOneOut(self.W_in).__iter__()
        L = 0
        for ii, X_not_i in enumerate(LOO_X):
            Y = next(LOO_Y)
            w = next(LOO_W)
            G = func(bw, endog=Y, exog=-X_not_i,
                     data_predict=-self.exog[ii, :], W=w)[0]
            L += (self.endog[ii] - G) ** 2

        # Note: There might be a way to vectorize this. See p.72 in [1]
        return L / self.nobs
Пример #7
0
 def insert_stubs(self, loc, stubs):
     """Return None.  Insert column of stubs at column `loc`.
     If there is a header row, it gets an empty cell.
     So ``len(stubs)`` should equal the number of non-header rows.
     """
     _Cell = self._Cell
     stubs = iter(stubs)
     for row in self:
         if row.datatype == 'header':
             empty_cell = _Cell('', datatype='empty')
             row.insert(loc, empty_cell)
         else:
             try:
                 row.insert_stub(loc, next(stubs))
             except NameError:  #Python 2.5 or earlier
                 row.insert_stub(loc, next(stubs))
             except StopIteration:
                 raise ValueError('length of stubs must match table length')
Пример #8
0
 def insert_stubs(self, loc, stubs):
     """Return None.  Insert column of stubs at column `loc`.
     If there is a header row, it gets an empty cell.
     So ``len(stubs)`` should equal the number of non-header rows.
     """
     _Cell = self._Cell
     stubs = iter(stubs)
     for row in self:
         if row.datatype == 'header':
             empty_cell = _Cell('', datatype='empty')
             row.insert(loc, empty_cell)
         else:
             try:
                 row.insert_stub(loc, next(stubs))
             except NameError: #Python 2.5 or earlier
                 row.insert_stub(loc, next(stubs))
             except StopIteration:
                 raise ValueError('length of stubs must match table length')
Пример #9
0
    def cv_loo(self, params):
        """
        Similar to the cross validation leave-one-out estimator.

        Modified to reflect the linear components.

        Parameters
        ----------
        params: array_like
            Vector consisting of the coefficients (b) and the bandwidths (bw).
            The first ``k_linear`` elements are the coefficients.

        Returns
        -------
        L: float
            The value of the objective function

        References
        ----------
        See p.254 in [1]
        """
        params = np.asarray(params)
        b = params[0:self.k_linear]
        bw = params[self.k_linear:]
        LOO_X = LeaveOneOut(self.exog)
        LOO_Y = LeaveOneOut(self.endog).__iter__()
        LOO_Z = LeaveOneOut(self.exog_nonparametric).__iter__()
        Xb = np.dot(self.exog, b)[:, None]
        L = 0
        for ii, X_not_i in enumerate(LOO_X):
            Y = next(LOO_Y)
            Z = next(LOO_Z)
            Xb_j = np.dot(X_not_i, b)[:, None]
            Yx = Y - Xb_j
            G = self.func(bw,
                          endog=Yx,
                          exog=-Z,
                          data_predict=-self.exog_nonparametric[ii, :])[0]
            lt = Xb[ii, :]  #.sum()  # linear term
            L += (self.endog[ii] - lt - G)**2

        return L
Пример #10
0
 def _data2rows(self, raw_data):
     """Return list of Row,
     the raw data as rows of cells.
     """
     logging.debug('Enter SimpleTable.data2rows.')
     _Cell = self._Cell
     _Row = self._Row
     rows = []
     for datarow in raw_data:
         dtypes = cycle(self._datatypes)
         newrow = _Row(datarow, datatype='data', table=self, celltype=_Cell)
         for cell in newrow:
             try:
                 cell.datatype = next(dtypes)
             except NameError:  #Python 2.5 or earlier
                 cell.datatype = next(dtypes)
             cell.row = newrow  #a cell knows its row
         rows.append(newrow)
     logging.debug('Exit SimpleTable.data2rows.')
     return rows
Пример #11
0
 def _data2rows(self, raw_data):
     """Return list of Row,
     the raw data as rows of cells.
     """
     logging.debug('Enter SimpleTable.data2rows.')
     _Cell = self._Cell
     _Row = self._Row
     rows = []
     for datarow in raw_data:
         dtypes = cycle(self._datatypes)
         newrow = _Row(datarow, datatype='data', table=self, celltype=_Cell)
         for cell in newrow:
             try:
                 cell.datatype = next(dtypes)
             except NameError: #Python 2.5 or earlier
                 cell.datatype = next(dtypes)
             cell.row = newrow  #a cell knows its row
         rows.append(newrow)
     logging.debug('Exit SimpleTable.data2rows.')
     return rows
Пример #12
0
    def cv_loo(self, params):
        """
        Similar to the cross validation leave-one-out estimator.

        Modified to reflect the linear components.

        Parameters
        ----------
        params: array_like
            Vector consisting of the coefficients (b) and the bandwidths (bw).
            The first ``k_linear`` elements are the coefficients.

        Returns
        -------
        L: float
            The value of the objective function

        References
        ----------
        See p.254 in [1]
        """
        params = np.asarray(params)
        b = params[0 : self.k_linear]
        bw = params[self.k_linear:]
        LOO_X = LeaveOneOut(self.exog)
        LOO_Y = LeaveOneOut(self.endog).__iter__()
        LOO_Z = LeaveOneOut(self.exog_nonparametric).__iter__()
        Xb = np.dot(self.exog, b)[:,None]
        L = 0
        for ii, X_not_i in enumerate(LOO_X):
            Y = next(LOO_Y)
            Z = next(LOO_Z)
            Xb_j = np.dot(X_not_i, b)[:,None]
            Yx = Y - Xb_j
            G = self.func(bw, endog=Yx, exog=-Z,
                          data_predict=-self.exog_nonparametric[ii, :])[0]
            lt = Xb[ii, :] #.sum()  # linear term
            L += (self.endog[ii] - lt - G) ** 2

        return L
Пример #13
0
    def _data2rows(self, raw_data):
        """Return list of Row,
        the raw data as rows of cells.
        """

        _Cell = self._Cell
        _Row = self._Row
        rows = []
        for datarow in raw_data:
            dtypes = cycle(self._datatypes)
            newrow = _Row(datarow, datatype='data', table=self, celltype=_Cell)
            for cell in newrow:
                cell.datatype = next(dtypes)
                cell.row = newrow  # a cell knows its row
            rows.append(newrow)

        return rows
Пример #14
0
    def _data2rows(self, raw_data):
        """Return list of Row,
        the raw data as rows of cells.
        """

        _Cell = self._Cell
        _Row = self._Row
        rows = []
        for datarow in raw_data:
            dtypes = cycle(self._datatypes)
            newrow = _Row(datarow, datatype='data', table=self, celltype=_Cell)
            for cell in newrow:
                cell.datatype = next(dtypes)
                cell.row = newrow  # a cell knows its row
            rows.append(newrow)

        return rows
Пример #15
0
 def _data2rows(self, raw_data):
     """Return list of Row,
     the raw data as rows of cells.
     """
     logging.debug("Enter SimpleTable.data2rows.")
     _Cell = self._Cell
     _Row = self._Row
     rows = []
     for datarow in raw_data:
         dtypes = cycle(self._datatypes)
         newrow = _Row(datarow, datatype="data", table=self, celltype=_Cell)
         for cell in newrow:
             cell.datatype = next(dtypes)
             cell.row = newrow  # a cell knows its row
         rows.append(newrow)
     logging.debug("Exit SimpleTable.data2rows.")
     return rows
Пример #16
0
    def cv_loo(self, params):
        # See p. 254 in Textbook
        params = np.asarray(params)
        b = params[0 : self.K]
        bw = params[self.K:]
        LOO_X = LeaveOneOut(self.exog)
        LOO_Y = LeaveOneOut(self.endog).__iter__()
        L = 0
        for i, X_not_i in enumerate(LOO_X):
            Y = next(LOO_Y)
            #print b.shape, np.dot(self.exog[i:i+1, :], b).shape, bw,
            G = self.func(bw, endog=Y, exog=-np.dot(X_not_i, b)[:,None],
                          #data_predict=-b*self.exog[i, :])[0]
                          data_predict=-np.dot(self.exog[i:i+1, :], b))[0]
            #print G.shape
            L += (self.endog[i] - G) ** 2

        # Note: There might be a way to vectorize this. See p.72 in [1]
        return L / self.nobs
Пример #17
0
    def cv_loo(self, params):
        # See p. 254 in Textbook
        params = np.asarray(params)
        b = params[0 : self.K]
        bw = params[self.K:]
        LOO_X = LeaveOneOut(self.exog)
        LOO_Y = LeaveOneOut(self.endog).__iter__()
        L = 0
        for i, X_not_i in enumerate(LOO_X):
            Y = next(LOO_Y)
            #print b.shape, np.dot(self.exog[i:i+1, :], b).shape, bw,
            G = self.func(bw, endog=Y, exog=-np.dot(X_not_i, b)[:,None],
                          #data_predict=-b*self.exog[i, :])[0]
                          data_predict=-np.dot(self.exog[i:i+1, :], b))[0]
            #print G.shape
            L += (self.endog[i] - G) ** 2

        # Note: There might be a way to vectorize this. See p.72 in [1]
        return L / self.nobs
Пример #18
0
    def loo_likelihood(self, bw, func=lambda x: x):
        """
        Returns the leave-one-out conditional likelihood of the data.

        If `func` is not equal to the default, what's calculated is a function
        of the leave-one-out conditional likelihood.

        Parameters
        ----------
        bw: array_like
            The bandwidth parameter(s).
        func: callable, optional
            Function to transform the likelihood values (before summing); for
            the log likelihood, use ``func=np.log``.  Default is ``f(x) = x``.

        Returns
        -------
        L: float
            The value of the leave-one-out function for the data.

        Notes
        -----
        Similar to ``KDE.loo_likelihood`, but substitute ``f(y|x)=f(x,y)/f(x)``
        for ``f(x)``.
        """
        yLOO = LeaveOneOut(self.data)
        xLOO = LeaveOneOut(self.exog).__iter__()
        L = 0
        for i, Y_j in enumerate(yLOO):
            X_not_i = next(xLOO)
            f_yx = gpke(bw,
                        data=-Y_j,
                        data_predict=-self.data[i, :],
                        var_type=(self.dep_type + self.indep_type))
            f_x = gpke(bw[self.k_dep:],
                       data=-X_not_i,
                       data_predict=-self.exog[i, :],
                       var_type=self.indep_type)
            f_i = f_yx / f_x
            L += func(f_i)

        return -L
Пример #19
0
    def loo_likelihood(self, bw, func=lambda x: x):
        """
        Returns the leave-one-out conditional likelihood of the data.

        If `func` is not equal to the default, what's calculated is a function
        of the leave-one-out conditional likelihood.

        Parameters
        ----------
        bw: array_like
            The bandwidth parameter(s).
        func: callable, optional
            Function to transform the likelihood values (before summing); for
            the log likelihood, use ``func=np.log``.  Default is ``f(x) = x``.

        Returns
        -------
        L: float
            The value of the leave-one-out function for the data.

        Notes
        -----
        Similar to ``KDE.loo_likelihood`, but substitute ``f(y|x)=f(x,y)/f(x)``
        for ``f(x)``.
        """
        yLOO = LeaveOneOut(self.data)
        xLOO = LeaveOneOut(self.exog).__iter__()
        L = 0
        for i, Y_j in enumerate(yLOO):
            X_not_i = next(xLOO)
            f_yx = gpke(bw, data=-Y_j, data_predict=-self.data[i, :],
                        var_type=(self.dep_type + self.indep_type))
            f_x = gpke(bw[self.k_dep:], data=-X_not_i,
                       data_predict=-self.exog[i, :],
                       var_type=self.indep_type)
            f_i = f_yx / f_x
            L += func(f_i)

        return -L