def transform(self, collection, transformations): """Apply all transformations to the variables in the collection. Parameters ---------- collection: BIDSVariableCollection The BIDSVariableCollection containing variables to transform. transformations : list List of transformations to apply. """ for t in transformations: t = convert_JSON(t) # make sure all keys are snake case kwargs = dict(t) name = self._sanitize_name(kwargs.pop('name')) cols = kwargs.pop('input', None) # Check registered transformations; fall back on default module func = self.transformations.get(name, None) if func is None: if not hasattr(self.default, name): raise ValueError("No transformation '%s' found: either " "explicitly register a handler, or pass a" " default module that supports it." % name) func = getattr(self.default, name) func(collection, cols, **kwargs) return collection
def _load_model(model, validate=True): # Load model info from JSON and do some validation if isinstance(model, str): with open(model, 'r', encoding='utf-8') as fobj: model = json.load(fobj) # Convert JSON from CamelCase to snake_case keys model = convert_JSON(model) if validate: validate_model(model) return model
def _load_model(self, model): if isinstance(model, str): with open(model, 'r', encoding='utf-8') as fobj: model = json.load(fobj) # Convert JSON from CamelCase to snake_case keys self.model = convert_JSON(model) steps = self.model['steps'] self.steps = [] for i, step_args in enumerate(steps): step = Step(self.layout, index=i, **step_args) self.steps.append(step)
def _load_model(self, model): if isinstance(model, str): model = json.load(open(model)) # Convert JSON from CamelCase to snake_case keys self.model = convert_JSON(model) steps = self.model['steps'] self.steps = [] for i, step_args in enumerate(steps): step_args['level'] = step_args['level'].lower() step = Step(self.layout, index=i, **step_args) self.steps.append(step)
def _load_model(self, model): if isinstance(model, str): with open(model, 'r', encoding='utf-8') as fobj: model = json.load(fobj) # Convert JSON from CamelCase to snake_case keys self.model = convert_JSON(model) steps = self.model['steps'] self.steps = [] for i, step_args in enumerate(steps): step_args['level'] = step_args['level'].lower() step = Step(self.layout, index=i, **step_args) self.steps.append(step)
def from_df(cls, df, model, metadata=None, formula=None): """ Initialize a GLMMSpec instance from a BIDSVariableCollection and a BIDS-StatsModels JSON spec. Parameters ---------- df : DataFrame A pandas DataFrame containing predictor information (i.e., the fixed component of the design matrix). model : dict The "Model" section from a BIDS-StatsModel specification. metadata: DataFrame Optional DataFrame containing additional columns that are not part of the design matrix but may have downstream informational use and/or contain variables needed to define random effects. Rows must map 1-to-1 with those in `df`. formula: str Optional Wilkinson (R-style) formula specifying the fixed (X) part of the design matrix. All variables referenced in the formula must be present as columns in `df`. Output names will follow the conventions specified in the `formulaic` documentation. Note that only the right-hand part of the formula should be passed (i.e., pass "X1 * X2", not "y ~ X1 * X2"). If provided, willl take precedence over any formula found in the `model`. Returns ------- A GLMMSpec instance. """ kwargs = {} # Fixed terms model = convert_JSON(model) formula = formula or model.get('formula') if formula is not None: df = model_matrix(formula, df) kwargs['X'] = df # Variance components vcs = model.get('variance_components', []) Z_list = [] if vcs: # VCs can be defined by variables in either the fixed predictor # DF or the supplementary metadata DF, so concatenate them. all_vars = [df, metadata] if metadata is not None else [df] all_vars = pd.concat(all_vars, axis=1) for vc in vcs: # Levels can either be defined by the levels of a single # categorical ("LevelsFrom") or by a set of binary variables. if 'levels_from' in vc: data = all_vars[vc['levels_from']].values Z_list.append(pd.get_dummies(data).values) else: df = all_vars.loc[:, vc['levels']] Z_list.append(df.values) Z = np.concatenate(Z_list, axis=1) groups = np.zeros((Z.shape[1], len(Z_list))) c = 0 for i, vc in enumerate(Z_list): n = vc.shape[1] groups[c:(c+n), i] = 1 c += n groups = pd.DataFrame(groups, columns=[vc['name'] for vc in vcs]) kwargs['Z'] = Z kwargs['groups'] = groups error = model.get('error') if error: kwargs['family'] = error.get('family') kwargs['link'] = error.get('link') return GLMMSpec(**kwargs)
def from_collection(cls, collection, model): """ Initialize a GLMMSpec instance from a BIDSVariableCollection and a BIDS-StatsModels JSON spec. Parameters ---------- collection : BIDSVariableCollection A BIDSVariableCollection containing variable information. model : dict The "Model" section from a BIDS-StatsModel specification. Returns ------- A GLMMSpec instance. """ if isinstance(collection, BIDSRunVariableCollection): if not collection.all_dense(): raise ValueError( "Input BIDSRunVariableCollection contains at " "least one sparse variable. All variables must" " be dense!") kwargs = {} # Fixed terms model = convert_JSON(model) names = model.get('x', []) if names: names = collection.match_variables(names) X = collection.to_df(names).loc[:, names] kwargs['X'] = X # Variance components vcs = model.get('variance_components', []) Z_list = [] if vcs: for vc in vcs: # Levels can either be defined by the levels of a single # categorical ("LevelsFrom") or by a set of binary variables. if 'levels_from' in vc: data = collection.variables[vc['levels_from']].values Z_list.append(pd.get_dummies(data).values) else: names = collection.match_variables(vc['levels']) df = collection.to_df(names).loc[:, names] Z_list.append(df.values) Z = np.concatenate(Z_list, axis=1) groups = np.zeros((Z.shape[1], len(Z_list))) c = 0 for i, vc in enumerate(Z_list): n = vc.shape[1] groups[c:(c + n), i] = 1 c += n groups = pd.DataFrame(groups, columns=[vc['name'] for vc in vcs]) kwargs['Z'] = Z kwargs['groups'] = groups error = model.get('error') if error: kwargs['family'] = error.get('family') kwargs['link'] = error.get('link') return GLMMSpec(**kwargs)