def assign_bmi_by_year(person: Dict) -> None: height = nesteddicts.get(person, ["immutable", "height"]) years = set(person.keys()) - {"immutable"} h_squared = height**2 for year in years: weight = nesteddicts.get(person, [year, "weight"]) bmi = weight / h_squared * 703 nesteddicts.put(person, [year, "bmi"], bmi)
def __call__( self, content: Dict) -> Iterator[Tuple[Optional[Any], Optional[str]]]: subjects: Dict[str, Dict] = nesteddicts.get(content, self.subjects_path, default=None) if subjects is None: return for identifier, subject in subjects.items(): try: value: Optional[Any] = nesteddicts.get(subject, self.arg_path) except MissingDataError: continue yield value, identifier
def decode_named_list(self, mappings: Dict[VariableId, str], content: Dict) -> Dict: """Convert a schema-compliant version of a named list of dicts into some other format. :param mappings: A mapping between the variables and their string values. :param content: The content in the schema format. """ path_mappings: Dict[VariableId, List[str]] = {} for var_id in mappings.keys(): var: Variable = self.schema.get(var_id) if var is None: raise ValueError('Unrecognized variable ID "%s"' % var_id) path_mappings[var_id] = list(var.relative_path) ret: Dict = {} for key, list_item in content.items(): decoded = {} for var_id, path in path_mappings.items(): try: value = nesteddicts.get(list_item, path) except MissingDataError: continue internal_key = mappings[var_id] decoded[internal_key] = value ret[key] = decoded return ret
def _single(self, content: Dict) -> None: raw: Optional[Any] = nesteddicts.get(content, self.source_path, default=None) if raw is None: return display: str = self.display_format(raw) nesteddicts.put(content, self.target_path, display)
def _list(self, content: Dict) -> None: try: source_list: List[Dict] = nesteddicts.get(content, self._list_base_path) for entry in source_list: self._single(entry) except MissingDataError: return
def _handle_explicit_na(self, data_type: str, a_tree: Dict, path: ListType) -> None: a_val: Optional[Any] = nesteddicts.get(a_tree, path, default=POLYTROPOS_CONFIRMED_NA) if a_val == POLYTROPOS_NA: raise ValueError("Actual value contained ostensibly non-occurring sentinel value %s" % POLYTROPOS_NA) if a_val == POLYTROPOS_CONFIRMED_NA: self._record_match(path, data_type, POLYTROPOS_NA) else: self._record_mismatch(path, data_type, POLYTROPOS_NA, a_val)
def _handle_explicit_none(self, data_type: str, a_tree: Dict, path: ListType) -> None: a_val: Optional[Any] = nesteddicts.get(a_tree, path, default=POLYTROPOS_CONFIRMED_NA) if a_val is None: self._record_match(path, data_type, None) else: self._record_mismatch(path, data_type, None, a_val)
def _keyed_list(self, content: Dict) -> None: try: source_klist: Dict[str, Dict] = nesteddicts.get(content, self._list_base_path) for entry in source_klist.values(): self._single(entry) except MissingDataError: return
def _inspect_primitive(self, data_type: str, f_val: Optional[Any], a_tree: Dict, path: ListType) -> None: assert f_val != POLYTROPOS_NA # Should have been handled at _inspect a_val: Optional[Any] = nesteddicts.get(a_tree, path, default=POLYTROPOS_CONFIRMED_NA) if a_val == POLYTROPOS_CONFIRMED_NA: self._record_missing(path, data_type, f_val) elif compare_primitives(f_val, a_val): self._record_match(path, data_type, f_val) else: self._record_mismatch(path, data_type, f_val, a_val)
def __call__( self, content: Dict) -> Iterator[Tuple[Optional[Any], Optional[str]]]: for identifier, argument_path in self.subjects.items(): try: value: Optional[Any] = nesteddicts.get(content, argument_path) except MissingDataError: continue yield value, identifier
def get_all_observations(self, var_id: str) -> Iterator[Tuple[str, Any]]: """Iterate over all observations of a temporal variable from this composite.""" var = self.as_var(var_id, track_type=TrackType.TEMPORAL) var_path: List = list(var.absolute_path) for period in self.periods: try: yield period, nesteddicts.get(self.content, [period] + var_path) except MissingDataError: continue
def __call__( self, content: Dict) -> Iterator[Tuple[Optional[Any], Optional[str]]]: subjects: ListType[Dict] = nesteddicts.get(content, self.subjects_path, default=None) if subjects is None: return for subject in subjects: try: value: Optional[Any] = nesteddicts.get(subject, self.arg_path) except MissingDataError: continue identifier: Optional[str] = POLYTROPOS_NA if self.identifier_path is not None: identifier = nesteddicts.get(subject, self.identifier_path, default=POLYTROPOS_NA) yield value, identifier
def assign_regression_stats(person: Dict) -> None: years = set(person.keys()) - {"immutable"} years_ordered = sorted([int(year) for year in years]) weights = [ nesteddicts.get(person, [str(year), "weight"]) for year in years_ordered ] slope, intercept, r_value, p_value, std_err = scipy.stats.linregress( years_ordered, weights) nesteddicts.put(person, ["immutable", "weight_change", "slope"], slope) nesteddicts.put(person, ["immutable", "weight_change", "p_value"], p_value)
def _inspect_complex(self, data_type: str, f_val: Optional[Any], a_tree: Dict, path: ListType) -> None: assert f_val != POLYTROPOS_NA # Should have been handled at _inspect a_val: Optional[Any] = nesteddicts.get(a_tree, path, default=POLYTROPOS_CONFIRMED_NA) if a_val == POLYTROPOS_CONFIRMED_NA: self._record_missing(path, data_type, json.dumps(f_val, sort_keys=True)) return compare: CompareComplexVariable = CompareComplexVariable(self.schema) if compare(f_val, a_val, path=path): self._record_match(path, data_type, json.dumps(f_val, sort_keys=True)) else: self._record_mismatch(path, data_type, json.dumps(f_val, sort_keys=True), json.dumps(a_val, sort_keys=True))
def get_observation(self, var_id: VariableId, period: str, treat_missing_as_null: bool = False) -> Optional[Any]: """Get the value of a temporal variable for a particular observation period.""" var = self.as_var(var_id, track_type=TrackType.TEMPORAL) var_path: List = list(var.absolute_path) try: return nesteddicts.get(self.content, [period] + var_path) except MissingDataError as e: if treat_missing_as_null: return None raise e
def get_immutable(self, var_id: VariableId, treat_missing_as_null: bool = False) -> Optional[Any]: """Get an immutable variable from this composite.""" var = self.as_var(var_id, track_type=TrackType.IMMUTABLE) path = ["immutable"] + list(var.absolute_path) try: return nesteddicts.get(self.content, path) except MissingDataError as e: if treat_missing_as_null: return None raise e
def delete_multiple(self, var: Variable, period_content: Dict) -> None: list_base_var: Variable = self.schema.get(var.nearest_list) assert not list_base_var.descends_from_list, "Nested list handling not implemented" try: content: Union[Dict, List] = nesteddicts.get(period_content, list_base_var.absolute_path) if content is None: return except MissingDataError: return for element in _elements_of(content): # type: Dict try: nesteddicts.delete(element, var.relative_path) except MissingDataError: continue
def _add_one_to_many(self, ret: Dict, tree: Dict, block: Tuple) -> None: root_var: GenericList = self._get_root_var(block) root_path: ListType = list(root_var.relative_path) try: subtree: Union[ListType, Dict] = nesteddicts.get(tree, root_path) except nesteddicts.MissingDataError: return if root_var.data_type == "List": subtree = cast(ListType, subtree) ret[root_var.var_id] = list(self._handle_list(subtree, block[1:])) elif root_var.data_type == "KeyedList": subtree = cast(Dict, subtree) ret[root_var.var_id] = self._handle_keyed_list(subtree, block[1:]) else: raise ValueError( 'Unexpected data type "{}" for putative root variable {}'. format(root_var.data_type, root_var.var_id))
def test_get_not_nested(default: Optional[str]): spec: List[str] = ["a", "b", "c"] data: Dict = {"a": {"b": "I'm supposed to be nested, but I'm not"}} with raises(nesteddicts.IncompleteNestingError): nesteddicts.get(data, spec, default=default)
def _do_get_test(data: Dict, spec: List[str], expected: Optional[str] = "expected", **kwargs): actual: Any = nesteddicts.get(data, spec, **kwargs) assert actual == expected
def assign_mean_bmi(person: Dict) -> None: years = set(person.keys()) - {"immutable"} bmis = [nesteddicts.get(person, [year, "bmi"]) for year in years] mean_bmi = numpy.average(bmis) nesteddicts.put(person, ["immutable", "mean_bmi"], mean_bmi)
def _one_to_one(self, tree: Dict, var_id: VariableId) -> Optional[Any]: var: Variable = self.composite.schema.get(var_id) if var is None: raise ValueError('Unknown variable ID "%s"' % var_id) path: ListType[str] = list(var.relative_path) return nesteddicts.get(tree, path)
def test_empty_spec_returns_self(): data: Dict = {"a": {"b": {"c": "expected"}}} spec: List[str] = [] expected: Dict = data actual: Any = nesteddicts.get(data, spec) assert actual == expected