def _setup_design(self): """ Create a callable object to evaluate the design matrix at a given set of parameter values to be specified by a recarray and observed Term values, also specified by a recarray. """ d = self.design_expr # Before evaluating, we recreate the formula # with numbered terms, and numbered parameters. # This renaming has no impact on the # final design matrix as the # callable, self._f below, is a lambda # that does not care about the names of the terms. # First, find all terms in the mean expression, # and rename them in the form "__t%d__" with a # random offset. # This may cause a possible problem # when there are parameters named something like "__t%d__". # Using the random offset will minimize the possibility # of this happening. # This renaming is here principally because of the # intercept. random_offset = np.random.random_integers(low=0, high=2**30) terms = getterms(self.mean) newterms = [] for i, t in enumerate(terms): newt = sympy.DeferredVector("__t%d__" % (i + random_offset)) for j, _ in enumerate(d): d[j] = d[j].subs(t, newt) newterms.append(newt) # Next, find all parameters that remain in the design expression. # In a standard regression model, there will be no parameters # because they will all be differentiated away in computing # self.design_expr. In nonlinear models, parameters will remain. params = getparams(self.design_expr) newparams = [] for i, p in enumerate(params): newp = sympy.Symbol("__p%d__" % (i + random_offset), dummy=True) for j, _ in enumerate(d): d[j] = d[j].subs(p, newp) newparams.append(newp) # If there are any aliased functions, these need to be added # to the name space before sympy lambdifies the expression # These "aliased" functions are used for things like # the natural splines, etc. You can represent natural splines # with sympy but the expression is pretty awful. _namespace = {} _add_aliases_to_namespace(_namespace, *d) self._f = sympy.lambdify(newparams + newterms, d, (_namespace, "numpy")) # The input to self.design will be a recarray of that must # have field names that the Formula will expect to see. # However, if any of self.terms are FactorTerms, then the field # in the recarray will not actually be in the Term. # # For example, if there is a Factor 'f' with levels ['a','b'], # there will be terms 'f_a' and 'f_b', though the input to # design will have a field named 'f'. In this sense, # the recarray used in the call to self.design # is not really made up of terms, but "preterms". # In this case, the callable preterm = [] for t in terms: if not is_factor_term(t): preterm.append(str(t)) else: preterm.append(t.factor_name) preterm = list(set(preterm)) # There is also an argument for parameters that are not # Terms. self._dtypes = { 'param': np.dtype([(str(p), np.float) for p in params]), 'term': np.dtype([(str(t), np.float) for t in terms]), 'preterm': np.dtype([(n, np.float) for n in preterm]) } self.__terms = terms
def _getdiff(self): p = list(set(getparams(self.mean))) p.sort() return [s.doit() for s in sympy.diff(self.mean, p)]
def params(self): """ The parameters in the Formula. """ return getparams(self.mean)
def _setup_design(self): """ Create a callable object to evaluate the design matrix at a given set of parameter values to be specified by a recarray and observed Term values, also specified by a recarray. """ d = self.design_expr # Before evaluating, we recreate the formula # with numbered terms, and numbered parameters. # This renaming has no impact on the # final design matrix as the # callable, self._f below, is a lambda # that does not care about the names of the terms. # First, find all terms in the mean expression, # and rename them in the form "__t%d__" with a # random offset. # This may cause a possible problem # when there are parameters named something like "__t%d__". # Using the random offset will minimize the possibility # of this happening. # This renaming is here principally because of the # intercept. random_offset = np.random.random_integers(low=0, high=2**30) terms = getterms(self.mean) newterms = [] for i, t in enumerate(terms): newt = sympy.DeferredVector("__t%d__" % (i + random_offset)) for j, _ in enumerate(d): d[j] = d[j].subs(t, newt) newterms.append(newt) # Next, find all parameters that remain in the design expression. # In a standard regression model, there will be no parameters # because they will all be differentiated away in computing # self.design_expr. In nonlinear models, parameters will remain. params = getparams(self.design_expr) newparams = [] for i, p in enumerate(params): newp = sympy.Symbol("__p%d__" % (i + random_offset), dummy=True) for j, _ in enumerate(d): d[j] = d[j].subs(p, newp) newparams.append(newp) # If there are any aliased functions, these need to be added # to the name space before sympy lambdifies the expression # These "aliased" functions are used for things like # the natural splines, etc. You can represent natural splines # with sympy but the expression is pretty awful. _namespace = {}; _add_aliases_to_namespace(_namespace, *d) self._f = sympy.lambdify(newparams + newterms, d, (_namespace, "numpy")) # The input to self.design will be a recarray of that must # have field names that the Formula will expect to see. # However, if any of self.terms are FactorTerms, then the field # in the recarray will not actually be in the Term. # # For example, if there is a Factor 'f' with levels ['a','b'], # there will be terms 'f_a' and 'f_b', though the input to # design will have a field named 'f'. In this sense, # the recarray used in the call to self.design # is not really made up of terms, but "preterms". # In this case, the callable preterm = [] for t in terms: if not is_factor_term(t): preterm.append(str(t)) else: preterm.append(t.factor_name) preterm = list(set(preterm)) # There is also an argument for parameters that are not # Terms. self._dtypes = {'param':np.dtype([(str(p), np.float) for p in params]), 'term':np.dtype([(str(t), np.float) for t in terms]), 'preterm':np.dtype([(n, np.float) for n in preterm])} self.__terms = terms