示例#1
0
    def _validate_data(self) -> None:
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if p.shape[0] != f.shape[0]:
            raise ValueError("The number of observations in portfolios and "
                             "factors is not the same.")
        self._drop_missing()

        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if has_constant(p)[0]:
            raise ValueError("portfolios must not contains a constant or "
                             "equivalent and must not have rank\n"
                             "less than the dimension of the smaller shape.")
        if has_constant(f)[0]:
            raise ValueError(
                "factors must not contain a constant or equivalent.")
        if np.linalg.matrix_rank(f) < f.shape[1]:
            raise ValueError(
                "Model cannot be estimated. factors do not have full column rank."
            )
        if p.shape[0] < (f.shape[1] + 1):
            raise ValueError(
                "Model cannot be estimated. portfolios must have factors + 1 or "
                "more returns to\nestimate the model parameters.")
    def s(self):
        """HAC score covariance estimate"""
        x, z, eps = self.x, self.z, self.eps
        nobs, nvar = x.shape

        pinvz = self._pinvz
        xhat = z @ (pinvz @ x)
        xhat_e = xhat * eps

        kernel = self.config['kernel']
        bw = self.config['bandwidth']
        if bw is None:
            self._auto_bandwidth = True
            from linearmodels.utility import has_constant
            const, loc = has_constant(xhat)
            sel = ones((xhat.shape[1], 1))
            if const:
                sel[loc] = 0
            scores = xhat_e @ sel
            bw = kernel_optimal_bandwidth(scores, kernel)

        self._bandwidth = bw
        w = self._kernels[kernel](bw, nobs - 1)

        s = _cov_kernel(xhat_e, w)

        return self._scale * s
示例#3
0
def test_hasconstant():
    x = np.random.randn(100, 3)
    hc, loc = has_constant(x)
    assert bool(hc) is False
    assert loc is None
    x[:, 0] = 1
    hc, loc = has_constant(x)
    assert hc is True
    assert loc == 0
    x[:, 0] = 2
    hc, loc = has_constant(x)
    assert hc is True
    assert loc == 0
    x[::2, 0] = 0
    x[:, 1] = 1
    x[1::2, 1] = 0
    hc, loc = has_constant(x)
    assert hc is True
示例#4
0
    def _validate_data(self):
        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if p.shape[0] != f.shape[0]:
            raise ValueError('The number of observations in portfolios and '
                             'factors is not the same.')
        self._drop_missing()

        p = self.portfolios.ndarray
        f = self.factors.ndarray
        if has_constant(p)[0]:
            raise ValueError('portfolios must not contains a constant or equivalent.')
        if has_constant(f)[0]:
            raise ValueError('factors must not contain a constant or equivalent.')
        if np.linalg.matrix_rank(f) < f.shape[1]:
            raise ValueError('Model cannot be estimated. factors do not have full column rank.')
        if np.linalg.matrix_rank(p) < p.shape[1]:
            raise ValueError('Model cannot be estimated. portfolios do not have full column rank.')
示例#5
0
 def _validate_inputs(self):
     x, z = self._x, self._z
     if x.shape[1] == 0:
         raise ValueError('Model must contain at least one regressor.')
     if self.instruments.shape[1] < self.endog.shape[1]:
         raise ValueError('The number of instruments ({0}) must be at least '
                          'as large as the number of endogenous regressors'
                          ' ({1}).'.format(self.instruments.shape[1],
                                           self.endog.shape[1]))
     if matrix_rank(x) < x.shape[1]:
         raise ValueError('regressors [exog endog] do not have full '
                          'column rank')
     if matrix_rank(z) < z.shape[1]:
         raise ValueError('instruments [exog instruments]  do not have '
                          'full column rank')
     self._has_constant, self._const_loc = has_constant(x)
示例#6
0
 def _validate_inputs(self) -> None:
     x, z = self._x, self._z
     if x.shape[1] == 0:
         raise ValueError("Model must contain at least one regressor.")
     if self.instruments.shape[1] < self.endog.shape[1]:
         raise ValueError(
             "The number of instruments ({0}) must be at least "
             "as large as the number of endogenous regressors"
             " ({1}).".format(self.instruments.shape[1],
                              self.endog.shape[1]))
     if matrix_rank(x) < x.shape[1]:
         raise ValueError("regressors [exog endog] do not have full "
                          "column rank")
     if matrix_rank(z) < z.shape[1]:
         raise ValueError("instruments [exog instruments]  do not have "
                          "full column rank")
     self._has_constant, self._const_loc = has_constant(x)
示例#7
0
    def _validate_data(self):
        ids = []
        for i, key in enumerate(self._equations):
            self._eq_labels.append(key)
            eq_data = self._equations[key]
            dep_name = 'dependent_' + str(i)
            exog_name = 'exog_' + str(i)
            if isinstance(eq_data, (tuple, list)):
                self._dependent.append(IVData(eq_data[0], var_name=dep_name))
                ids.append(id(eq_data[1]))
                self._exog.append(IVData(eq_data[1], var_name=exog_name))
                if len(eq_data) == 3:
                    self._weights.append(IVData(eq_data[2]))
                else:
                    dep = self._dependent[-1].ndarray
                    self._weights.append(IVData(ones_like(dep)))

            elif isinstance(eq_data, dict):
                self._dependent.append(IVData(eq_data['dependent'], var_name=dep_name))
                ids.append(id(eq_data['exog']))
                self._exog.append(IVData(eq_data['exog'], var_name=exog_name))
                if 'weights' in eq_data:
                    self._weights.append(IVData(eq_data['weights']))
                else:
                    dep = self._dependent[-1].ndarray
                    self._weights.append(IVData(ones_like(dep)))

            else:
                msg = UNKNOWN_EQ_TYPE.format(key=key, type=type(vars))
                raise TypeError(msg)
        for lhs, rhs in zip(self._dependent, self._exog):
            rhs_a = rhs.ndarray
            lhs_a = lhs.ndarray
            if lhs_a.shape[0] != rhs_a.shape[0]:
                raise ValueError('Dependent and exogenous do not have the same'
                                 ' number of observations')

        self._drop_missing()
        self._common_exog = len(set(ids)) == 1
        constant = []
        constant_loc = []
        for lhs, rhs, label in zip(self._dependent, self._exog, self._eq_labels):
            self._param_names.extend([label + '_' + col for col in rhs.cols])
            rhs_a = rhs.ndarray
            lhs_a = lhs.ndarray
            if lhs_a.shape[0] <= rhs_a.shape[1]:
                raise ValueError('Fewer observations than variables')
            if matrix_rank(rhs_a) < rhs_a.shape[1]:
                raise ValueError('Exogenous variable arrays are not all full '
                                 'rank')
            const, const_loc = has_constant(rhs_a)
            constant.append(const)
            constant_loc.append(const_loc)
        self._has_constant = Series(constant,
                                    index=[d.cols[0] for d in self._dependent])
        self._constant_loc = constant_loc

        for dep, exog, w in zip(self._dependent, self._exog, self._weights):
            y = dep.ndarray
            x = exog.ndarray
            w = w.ndarray
            w = w / nanmean(w)
            w_sqrt = np.sqrt(w)
            self._w.append(w)
            self._y.append(y)
            self._x.append(x)
            self._wy.append(y * w_sqrt)
            self._wx.append(x * w_sqrt)
示例#8
0
    def _validate_data(self):
        ids = []
        for i, key in enumerate(self._equations):
            self._eq_labels.append(key)
            eq_data = self._equations[key]
            dep_name = 'dependent_' + str(i)
            exog_name = 'exog_' + str(i)
            endog_name = 'endog_' + str(i)
            instr_name = 'instr_' + str(i)
            if isinstance(eq_data, (tuple, list)):
                dep = IVData(eq_data[0], var_name=dep_name)
                self._dependent.append(dep)
                current_id = id(eq_data[1])
                self._exog.append(IVData(eq_data[1], var_name=exog_name))
                endog = IVData(eq_data[2],
                               var_name=endog_name,
                               nobs=dep.shape[0])
                if endog.shape[1] > 0:
                    current_id = (current_id, id(eq_data[2]))
                ids.append(current_id)
                self._endog.append(endog)

                self._instr.append(
                    IVData(eq_data[3], var_name=instr_name, nobs=dep.shape[0]))
                if len(eq_data) == 5:
                    self._weights.append(IVData(eq_data[4]))
                else:
                    dep = self._dependent[-1].ndarray
                    self._weights.append(IVData(ones_like(dep)))

            elif isinstance(eq_data, dict):
                dep = IVData(eq_data['dependent'], var_name=dep_name)
                self._dependent.append(dep)
                current_id = id(eq_data['exog'])

                self._exog.append(IVData(eq_data['exog'], var_name=exog_name))
                endog = eq_data.get('endog', None)
                endog = IVData(endog, var_name=endog_name, nobs=dep.shape[0])
                self._endog.append(endog)
                if 'endog' in eq_data:
                    current_id = (current_id, id(eq_data['endog']))
                ids.append(current_id)

                instr = eq_data.get('instruments', None)
                instr = IVData(instr, var_name=instr_name, nobs=dep.shape[0])
                self._instr.append(instr)

                if 'weights' in eq_data:
                    self._weights.append(IVData(eq_data['weights']))
                else:
                    self._weights.append(IVData(ones(dep.shape)))
            else:
                msg = UNKNOWN_EQ_TYPE.format(key=key, type=type(vars))
                raise TypeError(msg)
        self._has_instruments = False
        for instr in self._instr:
            self._has_instruments = self._has_instruments or (instr.shape[1] >
                                                              1)

        for i, comps in enumerate(
                zip(self._dependent, self._exog, self._endog, self._instr)):
            shapes = list(map(lambda a: a.shape[0], comps))
            if min(shapes) != max(shapes):
                raise ValueError(
                    'Dependent, exogenous, endogenous and '
                    'instruments do not have the same number of '
                    'observations in eq {eq}'.format(eq=self._eq_labels[i]))

        self._drop_missing()
        self._common_exog = len(set(ids)) == 1
        if self._common_exog:
            # Common exog requires weights are also equal
            w0 = self._weights[0].ndarray
            for w in self._weights:
                self._common_exog = self._common_exog and np.all(
                    w.ndarray == w0)
        constant = []
        constant_loc = []

        for dep, exog, endog, instr, w, label in zip(self._dependent,
                                                     self._exog, self._endog,
                                                     self._instr,
                                                     self._weights,
                                                     self._eq_labels):
            y = dep.ndarray
            x = np.concatenate([exog.ndarray, endog.ndarray], 1)
            z = np.concatenate([exog.ndarray, instr.ndarray], 1)
            w = w.ndarray
            w = w / nanmean(w)
            w_sqrt = np.sqrt(w)
            self._w.append(w)
            self._y.append(y)
            self._x.append(x)
            self._z.append(z)
            self._wy.append(y * w_sqrt)
            self._wx.append(x * w_sqrt)
            self._wz.append(z * w_sqrt)
            cols = list(exog.cols) + list(endog.cols)
            self._param_names.extend([label + '_' + col for col in cols])
            if y.shape[0] <= x.shape[1]:
                raise ValueError('Fewer observations than variables in '
                                 'equation {eq}'.format(eq=label))
            if matrix_rank(x) < x.shape[1]:
                raise ValueError('Equation {eq} regressor array is not full '
                                 'rank'.format(eq=label))
            if x.shape[1] > z.shape[1]:
                raise ValueError('Equation {eq} has fewer instruments than '
                                 'endogenous variables.'.format(eq=label))
            if z.shape[1] > z.shape[0]:
                raise ValueError('Fewer observations than instruments in '
                                 'equation {eq}'.format(eq=label))
            if matrix_rank(z) < z.shape[1]:
                raise ValueError('Equation {eq} instrument array is full '
                                 'rank'.format(eq=label))

        for lhs, rhs, label in zip(self._y, self._x, self._eq_labels):
            const, const_loc = has_constant(rhs)
            constant.append(const)
            constant_loc.append(const_loc)
        self._has_constant = Series(constant,
                                    index=[d.cols[0] for d in self._dependent])
        self._constant_loc = constant_loc