def test_add_adds_to_track_variable_table(target_nested_dict_track, nested_new_var_spec): track: Track = target_nested_dict_track track.add(nested_new_var_spec, VariableId("A")) new_var: Variable = track[VariableId("A")] new_var_dict: Dict = new_var.dump() assert new_var_dict == nested_new_var_spec
def test_correct_variable_spec(): spec: Dict = { "var1": { "name": "name1", "data_type": "Text", "sort_order": 1 }, "var2": { "name": "name2", "data_type": "Integer", "sort_order": 0, "metadata": { "notes": "notes2" } } } track = Track.build(spec, None, "") var1 = track[VariableId("var1")] assert var1.name == "name1" assert var1.data_type == "Text" assert var1.sort_order == 1 assert var1.metadata == {} var2 = track[VariableId("var2")] assert var2.name == "name2" assert var2.data_type == "Integer" assert var2.sort_order == 0 assert var2.metadata == { "notes": "notes2" }
def test_add_no_sort_order_change(target_nested_dict_track, nested_new_var_spec): target_nested_dict_track.add(nested_new_var_spec, VariableId("A")) target_var_2: Variable = target_nested_dict_track[VariableId( "target_var_2")] assert target_var_2.sort_order == 0
def test_add_with_sources_alters_has_targets_for_source_after( target_nested_dict_track, nested_new_var_spec): target_track: Track = target_nested_dict_track source_track: Track = target_track.source nested_new_var_spec["sources"] = ["source_var_3"] target_nested_dict_track.add(nested_new_var_spec, VariableId("A")) source_var: Variable = source_track[VariableId("source_var_3")] assert source_var.has_targets
def test_add_pushes_sort_order_down(target_nested_dict_track, nested_new_var_spec): """If the new variable has a sort order lower than the highest sort order, it pushes down all others at that level.""" nested_new_var_spec["sort_order"] = 0 target_nested_dict_track.add(nested_new_var_spec, VariableId("A")) target_var_2: Variable = target_nested_dict_track[VariableId( "target_var_2")] assert target_var_2.sort_order == 1
def test_add_with_parent_alters_children_for_parent_after( target_nested_dict_track, nested_new_var_spec): target_nested_dict_track.add(nested_new_var_spec, VariableId("A")) target_folder_2: Variable = target_nested_dict_track[VariableId( "target_folder_2")] expected: Set = {"target_var_2", "A"} actual: Set = set(map(lambda child: child.var_id, target_folder_2.children)) assert expected == actual
def test_add_with_sources_alters_source_of_for_source(target_nested_dict_track, nested_new_var_spec): target_track: Track = target_nested_dict_track source_track: Track = target_track.source nested_new_var_spec["sources"] = ["source_var_1"] target_nested_dict_track.add(nested_new_var_spec, VariableId("A")) source_var: Variable = source_track[VariableId("source_var_1")] actual: Set = set(source_var.targets()) expected: Set = {"A", "target_var_1"} assert actual == expected
def test_add_alters_descendants_that(target_nested_dict_track, nested_new_var_spec): track: Track = target_nested_dict_track track.add(nested_new_var_spec, VariableId("A")) actual: Set[str] = {"A", "target_var_1", "target_var_2"} expected: Set[str] = set(track.descendants_that(container=-1)) assert actual == expected
def test_add_changes_track_list(target_nested_dict_track, nested_new_var_spec): track: Track = target_nested_dict_track track_spec_old: Dict = track.dump() expected: Dict = copy.deepcopy(track_spec_old) expected["A"] = nested_new_var_spec track.add(nested_new_var_spec, VariableId("A")) actual: Dict = track.dump() assert actual == expected
def __init__(self, specs: Dict, source: Optional["Track"], name: str): """Do not call directly; use Track.build().""" self._variables: Dict["VariableId", "Variable"] = {} self.name = name self.source = source self.target = None self.schema: Optional["Schema"] = None self._cache: Dict[str, Any] = {} if source: source.target = self logging.info("Building variables for track '%s'." % name) n: int = 0 for variable_id, variable_data in specs.items(): if variable_id == '': # Invalid var id raise ValueError logging.debug('Building variable "%s".' % variable_id) variable: "Variable" = self.build_variable(variable_data, variable_id) self._variables[VariableId(variable_id)] = variable n += 1 if n % 100 == 0: logging.info("Built %i variables." % n) logging.info('Finished building all %i variables for track "%s".' % (n, name)) logging.info( 'Performing post-load validation on variables for track "%s".' % name) n = 0 validation_errors: ListType[Tuple[Variable, Exception]] = [] for variable in self.values(): try: Validator.validate(variable, init=True) except Exception as ex: validation_errors.append((variable, ex)) n += 1 if n % 100 == 0: if len(validation_errors) == 0: logging.info("Validated %i variables.", n) else: logging.info("Validated %i variables (%i invalid).", n, len(validation_errors)) if len(validation_errors) == 0: logging.info('All variables valid "%s".' % name) else: logging.error( '%i variables valid, %i invalid "%s".' % (n - len(validation_errors), len(validation_errors), name)) raise ValidationError(validation_errors)
def __init__(self, specs: Dict, source: Optional["Track"], name: str): """Do not call directly; use Track.build().""" self._variables: Dict["VariableId", "Variable"] = {} self.name = name self.source = source self.target = None self.schema: Optional["Schema"] = None self._cache: Dict[str, Any] = {} if source: source.target = self logging.info("Building variables for track '%s'." % name) n: int = 0 for variable_id, variable_data in specs.items(): if variable_id == '': # Invalid var id raise ValueError logging.debug('Building variable "%s".' % variable_id) variable: "Variable" = self.build_variable(variable_data, variable_id) self._variables[VariableId(variable_id)] = variable n += 1 if n % 100 == 0: logging.info("Built %i variables." % n) logging.info('Finished building all %i variables for track "%s".' % (n, name)) # we only validate after the whole thing is built to be able to # accurately compute siblings, parents and children self.invalidate_variables_cache() n = 0 if name.startswith("nonprofit_origin"): logging.warning("Skipping validation for nonprofit origin. THIS IS DANGEROUS DEBUG LOGIC--REMOVE LATER.") else: logging.info('Performing post-load validation on variables for track "%s".' % name) for variable in self.values(): Validator.validate(variable, init=True) n += 1 if n % 100 == 0: logging.info("Validated %i variables." % n) logging.info('All variables valid "%s".' % name)
def new_var_id(self) -> VariableId: """If no ID is supplied, use <stage name>_<temporal|invarant>_<n+1>, where n is the number of variables.""" # Missing the temporal/immutable part for now return VariableId('{}_{}'.format(self.name, len(self._variables) + 1))
def do_source_swap(track: Track) -> Variable: var: Variable = track[VariableId("target_var_2")] var.sources = [VariableId("source_var_3")] return var
def test_alter_source_changes_sources_list(target_nested_dict_track): track: Track = target_nested_dict_track var: Variable = track[VariableId("target_var_2")] var.sources = ["source_var_2", "source_var_3"] assert var.sources == ["source_var_2", "source_var_3"]
def test_add_non_container_parent_raises(target_nested_dict_track, nested_new_var_spec): nested_new_var_spec["parent"] = "target_var_1" with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("A"))
def test_alter_source_raises(target_nested_dict_track): track: Track = target_nested_dict_track var: Variable = track[VariableId("target_var_2")] with pytest.raises(AttributeError): var.sources = ["source_var_2", "source_var_3"]
def test_add_empty_var_id_raises(target_nested_dict_track, nested_new_var_spec): with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId(""))
def test_add_invalid_source_raises(target_nested_dict_track, nested_new_var_spec): nested_new_var_spec["sources"] = ["non-existent source"] with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("A"))
def test_add_illegal_name_raises(illegal_name, target_nested_dict_track, nested_new_var_spec): nested_new_var_spec["name"] = illegal_name with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("A"))
def test_add_locally_non_unique_name_raises(target_nested_dict_track, nested_new_var_spec): nested_new_var_spec["name"] = "second_target" with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("A"))
def test_add_track_non_unique_id_raises(target_nested_dict_track, nested_new_var_spec): with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("target_var_2"))
def test_add_non_existent_parent_raises(target_nested_dict_track, nested_new_var_spec): nested_new_var_spec["parent"] = "non-existent parent" with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("A"))
def test_invalid_sort_order_raises(target_nested_dict_track, nested_new_var_spec): nested_new_var_spec["sort_order"] = 5 with pytest.raises(ValueError): target_nested_dict_track.add(nested_new_var_spec, VariableId("A"))