def test_re_identification_risk_to_dataframe_shape(self): risk_profile = RiskProfile(self.risk_profile_response) df = risk_profile.re_identification_risk_dataframe() self.assertEqual( self.risk_profile_response["reIdentificationRisk"]["measures"] ["records_affected_by_highest_prosecutor_risk"], df["records_affected_by_highest_prosecutor_risk"][0])
def test_equality(self): risk_profile_1 = RiskProfile(self.risk_profile_response) risk_profile_2 = RiskProfile(self.risk_profile_response) self.assertEqual(risk_profile_1, risk_profile_2) risk_profile_2._re_identification_of_risk[ "estimated_prosecutor_risk"] = 50.0 self.assertNotEqual(risk_profile_1, risk_profile_2)
def test_attacker_success_rate_property(self): expected = { 'Prosecutor_attacker_success_rate': 1.0, 'Marketer_attacker_success_rate': 1.0, 'Journalist_attacker_success_rate': 1.0 } risk_profile = RiskProfile(self.risk_profile_response) self.assertEqual(expected, risk_profile.attacker_success_rate)
def risk_profile(self, dataset: Dataset) -> RiskProfile: """ Creates a risk profile for a provided Dataset RiskProfile contains: - re-identifiaction risks - distributed risk :param dataset: Dataset to create a risk profile for :return: RiskProfile """ analyze_request = self._risk_profile_payload(dataset) response = self._risk_profile(analyze_request) metric_dict = json.loads(response.text) return RiskProfile(metric_dict)
def _anonymize_result(self, response): """ Creates the result to be delivered back to the caller :param response: :return: """ json_string = response.text response_dict = json.loads(json_string) attributes = self._attributes(response_dict) dataset = Dataset(response_dict["anonymizeResult"]["data"], attributes) risk_profile = RiskProfile(response_dict["riskProfile"]) anon_status = response_dict["anonymizeResult"]["anonymizationStatus"] anonymization_metrics = response_dict["anonymizeResult"]["metrics"] return AnonymizeResult._from_response(dataset, risk_profile, anonymization_metrics, anon_status)
def setUp(self): self.test_data = [['id', 'name'], ['0', 'Viktor'], ['1', 'Jerry']] self.test_attribute_type_mapping = { 'id': AttributeType.IDENTIFYING, 'name': AttributeType.QUASIIDENTIFYING } self.test_dataset = Dataset(self.test_data, self.test_attribute_type_mapping) self.risk_profile_response = { "reIdentificationRisk": { "measures": { "measure_value": "[%]", "Prosecutor_attacker_success_rate": "98.72", "records_affected_by_highest_prosecutor_risk": "97.46000000000001", "sample_uniques": "97.46000000000001", "estimated_prosecutor_risk": "100.0", "population_model": "PITMAN", "highest_journalist_risk": "100.0", "records_affected_by_lowest_risk": "0.06", "estimated_marketer_risk": "98.72000000000001", "Journalist_attacker_success_rate": "98.72", "highest_prosecutor_risk": "100.0", "estimated_journalist_risk": "100.0", "lowest_risk": "33.33333333333333", "Marketer_attacker_success_rate": "98.72", "average_prosecutor_risk": "98.72000000000001", "records_affected_by_highest_journalist_risk": "97.46000000000001", "population_uniques": "39.64593493418713", "quasi_identifiers": [ "Innvandrerbakgrunn", "Ytelse", "Innsatsgruppe", "Ledighetsstatus" ] } }, "distributionOfRisk": { "riskIntervalList": [{ "interval": "]50,100]", "recordsWithRiskWithinInteval": 0.9746, "recordsWithMaxmalRiskWithinInterval": 1.0 }, { "interval": "]33.4,50]", "recordsWithRiskWithinInteval": 0.0248, "recordsWithMaxmalRiskWithinInterval": 0.0254 }, { "interval": "]25,33.4]", "recordsWithRiskWithinInteval": 0.0006, "recordsWithMaxmalRiskWithinInterval": 0.0006 }, { "interval": "]20,25]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]16.7,20]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]14.3,16.7]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]12.5,14.3]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]10,12.5]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]9,10]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]8,9]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]7,8]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]6,7]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]5,6]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]4,5]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]3,4]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]2,3]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]1,2]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]0.1,1]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]0.01,0.1]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]0.001,0.01]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]0.0001,0.001]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]1e-5,0.0001]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]1e-6,1e-5]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }, { "interval": "]0,1e-6]", "recordsWithRiskWithinInteval": 0.0, "recordsWithMaxmalRiskWithinInterval": 0.0 }] } } self.test_riskprofile = RiskProfile(self.risk_profile_response) self.test_anonymize_result = AnonymizeResult(self.test_data, self.test_riskprofile, self.test_metrics)
def risk_profile() -> RiskProfile: raw_risk_profile = analyze_response() return RiskProfile(raw_risk_profile)
def test_population_model_property(self): expected = "ZAYATZ" risk_profile = RiskProfile(self.risk_profile_response) self.assertEqual(expected, risk_profile.population_model)
def test_quasi_indentifers_property(self): expected = ['zipcode'] risk_profile = RiskProfile(self.risk_profile_response) self.assertEqual(expected, risk_profile.quasi_identifiers)
def test_re_identification_risk_to_dataframe__column_types(self): risk_profile = RiskProfile(self.risk_profile_response) df = risk_profile.re_identification_risk_dataframe() for d_type in df.dtypes.tolist(): self.assertEqual(d_type, dtype("float64"))
def test_distribution_of_risk_to_dataframe_shape(self): risk_profile = RiskProfile(self.risk_profile_response) df = risk_profile.distribution_of_risk_dataframe() self.assertEqual( self.risk_profile_response["distributionOfRisk"] ["riskIntervalList"][0]["interval"], df["interval"][0])
def test_to_dataframe(self): risk_profile = RiskProfile(self.risk_profile_response) df = risk_profile.re_identification_risk_dataframe() self.assertIsInstance(df, DataFrame)
def test_hash(self): risk_profile_1 = RiskProfile(self.risk_profile_response) risk_profile_2 = RiskProfile(self.risk_profile_response) test_set = {risk_profile_1, risk_profile_2} self.assertEqual(1, len(test_set))
def test_init(self): RiskProfile(self.risk_profile_response)