def test_record_fpe_mask(): rec = {'latitude': -70.783, 'longitude': -112.221, 'credit_card': '4123 5678 9123 4567', 'the_dude': 100000000, 'the_hotness': "convertme", "the_sci_notation": 1.23E-7} mask = StringMask(start_pos=1) cc_xf = [FormatConfig(pattern=r'\s+', replacement=''), FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, mask=[mask])] data_paths = [DataPath(input='credit_card', xforms=cc_xf)] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) xf_payload = xf.transform_record(rec) check = xf_payload.get('credit_card') assert check == '4599631908097107' rf_payload = rf.transform_record(xf_payload) check = rf_payload.get('credit_card') assert check == '4123567891234567' cc_xf = [FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, mask=[mask])] data_paths = [DataPath(input='credit_card', xforms=cc_xf)] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) xf_payload = xf.transform_record(rec) check = xf_payload.get('credit_card') assert check == '4599 6319 0809 7107' rf_payload = rf.transform_record(xf_payload) check = rf_payload.get('credit_card') assert check == '4123 5678 9123 4567'
def test_fpe_string(): mask_last_name = StringMask(mask_after=' ') mask_first_name = StringMask(mask_until=' ') fpe_string_config = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=62, mask=[mask_last_name]) xf = factory(fpe_string_config) record = xf.transform_field("person_name", "John Doe", None) assert record == {'person_name': 'John BDy'} record = xf._restore_field('person_name', record['person_name'], None) assert record == {'person_name': 'John Doe'} fpe_string_config = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=62, mask=[mask_first_name]) xf = factory(fpe_string_config) record = xf.transform_field("person_name", "John Doe", None) assert record == {'person_name': 'Uugx Doe'} record = xf._restore_field('person_name', record['person_name'], None) assert record == {'person_name': 'John Doe'} fpe_string_config = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=62, mask=[mask_first_name, mask_last_name]) xf = factory(fpe_string_config) record = xf.transform_field("person_name", "John Doe", None) assert record == {'person_name': 'Uugx BDy'} record = xf._restore_field('person_name', record['person_name'], None) assert record == {'person_name': 'John Doe'}
def test_record_fpe_base62(): rec = {'latitude': -70.783, 'longitude': -112.221, 'credit_card': '4123567891234567', 'the_dude': 100000000, 'the_hotness': "This is some awesome text with UPPER and lower case characters.", "the_sci_notation": 1.23E-7} numbers_xf = [FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)] float_xf = [FpeFloatConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, float_precision=3)] cc_xf = [FormatConfig(pattern=r'\s+', replacement=''), FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)] text_xf = [ FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=62)] data_paths = [DataPath(input='credit_card', xforms=cc_xf), DataPath(input='longitude', xforms=float_xf), DataPath(input='latitude', xforms=float_xf), DataPath(input='the_dude', xforms=numbers_xf), DataPath(input='the_sci_notation', xforms=float_xf), DataPath(input='the_hotness', xforms=text_xf) ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) xf_payload = xf.transform_record(rec) check = xf_payload.get('credit_card') assert check == '5931468769662449' check = rf.transform_record(xf_payload) assert check == rec
def test_pipe_record_filter(record_meta_data_check): entity_xf = [ RedactWithLabelConfig(labels=['date']), SecureHashConfig(secret='rockybalboa', labels=['location']), FpeStringConfig(labels=['credit_card_number'], secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) ] data_paths = [ DataPath(input='Country', xforms=entity_xf), DataPath(input='?ddress', xforms=entity_xf), DataPath(input='Cr*', xforms=entity_xf) ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) transformed = xf.transform_record(record_meta_data_check) assert transformed['record']['Credit Card'] == '4471585942734458' assert transformed['metadata']['fields']['Credit Card']['ner']['labels'][0]['text'] == '4471585942734458' assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['start'] == 0 assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['end'] == 64 # The metadata has one entry less than record entries, because Address does not have meta data in this test. assert len(transformed['metadata']['fields']) == 2 assert len(transformed['record']) == 3 restored = rf.transform_record(transformed) assert restored['record']['Credit Card'] == record_meta_data_check['record']['Credit Card']
def test_gretel_meta(record_and_meta_2): xf_fpe = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) xf_redact_entity = FpeStringConfig(labels=['ip_address'], secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) data_paths = [ DataPath(input='latitude', xforms=xf_fpe), DataPath(input='*', xforms=xf_redact_entity) ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) check = xf.transform_record(record_and_meta_2) assert check['metadata']['gretel_id'] == '2732c7ed44a8402f899a01e52a931985' check = rf.transform_record(check) assert check['record'] == record_and_meta_2['record'] assert check['metadata']['gretel_id'] == '2732c7ed44a8402f899a01e52a931985'
def test_record_fpe_precision(): rec = {'latitude': -70.783, 'longitude': -112.221, 'credit_card': '4123567891234567', 'the_dude': 100000000, 'the_hotness': "convertme", "the_sci_notation": 1.23E-7} int_xf = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) num1_xf = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, float_precision=1) num2_xf = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, float_precision=0) num3_xf = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, float_precision=1) num4_xf = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=36) data_paths = [ DataPath(input='credit_card', xforms=int_xf), DataPath(input='latitude', xforms=num1_xf), DataPath(input='the_dude', xforms=int_xf), DataPath(input='longitude', xforms=num2_xf), DataPath(input='the_sci_notation', xforms=num3_xf), DataPath(input='the_hotness', xforms=num4_xf), DataPath(input='*') ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) xf_payload = xf.transform_record(rec) check = xf_payload.get('credit_card') assert check == '5931468769662449' check = xf_payload.get('longitude') assert check == -112.2929577756414 check = xf_payload.get('latitude') assert check == -70.78143312456855 check = xf_payload.get('the_hotness') assert check == '2qjuxg7ju' check = xf_payload.get('the_dude') assert check == 128994144 check = xf_payload.get('the_sci_notation') assert check == 1.2342967235924508e-07 check = rf.transform_record(xf_payload) assert check == rec
def build_anonymizing_transforms(self): for entity in self.id_entities: # Get all the project fields tagged as this entity type entity_fields = [ d["field"] for d in self.project.get_field_details(entity=entity) ] for field in entity_fields: dice_roll = random.randint(1, 6) xf = [] if dice_roll == 1: print(f"Dropping field {field}") xf = [DropConfig()] if dice_roll == 2: print(f"Faking field {field}") xf = [ FakeConstantConfig(seed=SEED, fake_method=FAKER_MAP.get( entity, "name")) ] if dice_roll == 3: print(f"Encrypting field {field}") # radix 62 will encrypt alphanumeric but no special characters xf = [FpeStringConfig(secret=SECRET, radix=62)] if dice_roll == 4: print(f"Character redacting field {field}") # Use a fancier mask for emails if entity == "email_address": xf = [ RedactWithCharConfig( char="X", mask=[ StringMask(start_pos=3, mask_until="@"), StringMask(mask_after="@", mask_until=".", greedy=True), ], ) ] else: xf = [ RedactWithCharConfig( "#", mask=[StringMask(start_pos=3)]) ] if dice_roll == 5: print(f"Label redacting field {field}") xf = [RedactWithLabelConfig(labels=[entity])] if dice_roll == 6: print(f"String redacting field {field}") xf = [RedactWithStringConfig(string="CLASSIFIED")] self.data_paths.append(DataPath(input=field, xforms=xf))
def test_record_zero_fpe(): rec = {'latitude': 0.0, 'longitude': -0.0, 'credit_card': '4123567891234567', 'the_dude': 100000000, 'the_hotness': "convertme", "the_sci_notation": 1.23E-7} numbers_xf = [FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)] float_xf = [FpeFloatConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, float_precision=3)] text_xf = [ FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=36)] data_paths = [ DataPath(input='credit_card', xforms=numbers_xf), DataPath(input='latitude', xforms=float_xf), DataPath(input='longitude', xforms=float_xf), DataPath(input='the_dude', xforms=numbers_xf), DataPath(input='the_sci_notation', xforms=float_xf), DataPath(input='the_hotness', xforms=text_xf) ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) xf_payload = xf.transform_record(rec) check = xf_payload.get('credit_card') assert check == '5931468769662449' check = xf_payload.get('longitude') assert check == -1.32547939979e-312 check = xf_payload.get('latitude') assert check == 1.32547939979e-312 check = xf_payload.get('the_hotness') assert check == '2qjuxg7ju' check = xf_payload.get('the_dude') assert check == 128994144 check = xf_payload.get('the_sci_notation') assert check == 1.229570610794763e-07 check = rf.transform_record(xf_payload) assert check == rec
def test_record_output_map_and_schemas(): rec = {'a': 1.23, 'b': 2.34, 'c': 3.45, 'd': 4.56, 'e': 5.67} rec2 = {'f': 1.23, 'b': 2.34, 'c': 3.45, 'd': 4.56, 'e': 5.67} test_payloads = [(rec, record_key) for record_key in RECORD_KEYS] test_payloads.append((rec, None)) for payload, record_key in test_payloads: xf_list = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) data_paths = [ DataPath(input='a', output='x'), DataPath(input='b', output='y'), DataPath(input='c', xforms=xf_list, output='z'), DataPath(input='d', xforms=xf_list), DataPath(input='e', xforms=xf_list), DataPath(input='*') ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) xf_payload = xf.transform_record(payload) xf_record = xf_payload.get(record_key) or xf_payload check = xf_record.get('x') assert check == 1.23 check = xf_record.get('y') assert check == 2.34 check = xf_record.get('z') assert check == 3.590038584114511 check = xf_record.get('d') assert check == 7.002521213914073 check = xf_record.get('e') assert check == 4.9570355284951875 check = rf.transform_record(xf_payload) check = check.get(record_key) or check assert check == rec # test multiple names mapping to the same output field xf_list = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) data_paths = [ DataPath(input='a', xforms=xf_list, output='x'), DataPath(input='f', xforms=xf_list, output='x'), DataPath(input='b', xforms=xf_list, output='y'), DataPath(input='c', xforms=xf_list, output='z'), DataPath(input='*') ] xf = DataTransformPipeline(data_paths) xf_payload = xf.transform_record(rec) xf_payload2 = xf.transform_record(rec2) xf_record = xf_payload.get(record_key) or xf_payload xf_record2 = xf_payload2.get(record_key) or xf_payload2 assert xf_record == xf_record2
def test_fpe_dirty_transform(record_dirty_fpe_check): field_xf = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) data_paths = [ DataPath(input='Credit Card', xforms=field_xf), DataPath(input='Customer ID', xforms=field_xf), DataPath(input='*') ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) transformed = xf.transform_record(record_dirty_fpe_check) assert transformed['Credit Card'] == '447158 5942734 458' assert transformed['Customer ID'] == '747/52*232 83-19' restored = rf.transform_record(transformed) assert restored == record_dirty_fpe_check
def test_meta_data_transform(record_meta_data_check): entity_xf = [ RedactWithLabelConfig(labels=['date']), SecureHashConfig(secret='rockybalboa', labels=['location']), FpeStringConfig(labels=['credit_card_number'], secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) ] data_paths = [DataPath(input='*', xforms=entity_xf)] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) transformed = xf.transform_record(record_meta_data_check) assert transformed['record']['Credit Card'] == '4471585942734458' assert transformed['metadata']['fields']['Credit Card']['ner']['labels'][0]['text'] == '4471585942734458' assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['start'] == 0 assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['end'] == 64 restored = rf.transform_record(transformed) assert restored['record']['Credit Card'] == record_meta_data_check['record']['Credit Card']
def test_pipe_date_shift_cbc_fast(records_date_tweak): # run tests with user_id to tweak the de-identified date xf_user_id = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, aes_mode=crypto_aes.Mode.CBC_FAST) xf_date = DateShiftConfig(secret='2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94', lower_range_days=-10, upper_range_days=25, tweak=FieldRef('user_id')) data_paths = [DataPath(input='user_id', xforms=xf_user_id), DataPath(input='created', xforms=xf_date), DataPath(input='*') ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) check_aw = xf.transform_record(records_date_tweak[0]) check_ae = xf.transform_record(records_date_tweak[1]) assert check_aw['created'] == '2016-06-18' assert check_ae['created'] == '2016-06-18' check_ae = rf.transform_record(check_ae) check_aw = rf.transform_record(check_aw) assert check_aw['created'] == '2016-06-17' assert check_ae['created'] == '2016-06-17' # run tests without tweaking the de-identified date xf_date = DateShiftConfig(secret='2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94', lower_range_days=-10, upper_range_days=25) data_paths = [DataPath(input='created', xforms=xf_date)] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) check_aw = xf.transform_record(records_date_tweak[0]) check_ae = xf.transform_record(records_date_tweak[1]) assert check_aw['created'] == '2016-06-13' assert check_ae['created'] == '2016-06-13' record_and_meta_aw = check_aw record_and_meta_ae = check_ae check_aw = rf.transform_record(record_and_meta_aw) check_ae = rf.transform_record(record_and_meta_ae) assert check_aw['created'] == '2016-06-17' assert check_ae['created'] == '2016-06-17'
""" Basic Format Preserving Encryption """ from gretel_client.transformers import FpeStringConfig from gretel_client.transformers import DataPath, DataTransformPipeline from gretel_client.transformers.string_mask import StringMask mask = StringMask(start_pos=1) xf = FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, mask=[mask], ) xf2 = FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=62) paths = [ DataPath(input="credit_card", xforms=xf), DataPath(input="name", xforms=xf2), DataPath(input="*"), ] pipe = DataTransformPipeline(paths) rec = {"name": "John Doe", "credit_card": "4123 5678 9012 3456"} out = pipe.transform_record(rec) assert out == {"name": "2DZv ZmN", "credit_card": "4521 1021 2994 9272"}
DataPath, DataTransformPipeline, DataRestorePipeline, ) rec = { "Address": "317 Massa. Av.", "City": "Didim", "Country": "Eritrea", "Credit Card": "601128 2195205 818", "Customer ID": "169/61*009 38-34", "Date": "2019-10-08", "Name": "Grimes, Bo H.", "Zipcode": "745558", } field_xf = FpeStringConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) data_paths = [ DataPath(input="Credit Card", xforms=field_xf), DataPath(input="Customer ID", xforms=field_xf), DataPath(input="*"), ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) transformed = xf.transform_record(rec) assert transformed["Credit Card"] == "447158 5942734 458" assert transformed["Customer ID"] == "747/52*232 83-19" restored = rf.transform_record(transformed) assert restored == rec