def anony_txt(txtFile): anon_en = AnonymizerChain(Anonymization('en_US')) anon_en.add_anonymizers(EmailAnonymizer, UriAnonymizer, MacAddressAnonymizer, PhoneNumberAnonymizer, INNAnonymizer, NamedEntitiesAnonymizer4('en')) anon_ru = AnonymizerChain(Anonymization('ru_RU')) anon_ru.add_anonymizers(EmailAnonymizer, UriAnonymizer, MacAddressAnonymizer, PhoneNumberAnonymizer, INNAnonymizer, NamedEntitiesAnonymizer4('ru')) with open(txtFile, 'r', encoding='utf-8-sig') as f: anfile = open(txtFile[:-4] + "_anonymized.txt", 'w', encoding='utf-8') rejfile = open(txtFile[:-4] + "_rejected_anonymization.txt", 'w', encoding='utf-8') line = f.readline() while line: cfg.NERnumber = 0 anline_en = anon_en.anonymize(line.split("\t")[0]) NERnumber_en = cfg.NERnumber cfg.NERnumber = 0 anline_ru = anon_ru.anonymize(line.split("\t")[1].strip()) NERnumber_ru = cfg.NERnumber if NERnumber_en == NERnumber_ru: anfile.write(anline_en + "\t" + anline_ru + "\n") else: anfile.write(line) rejfile.write(anline_en + "\t" + anline_ru + "\n") line = f.readline() anfile.close() rejfile.close()
def get_anonymize_text(self, query): anon = AnonymizerChain(Anonymization('en_US')) anon.add_anonymizers(FilePathAnonymizer,\ EmailAnonymizer, UriAnonymizer,MacAddressAnonymizer,Ipv4Anonymizer, Ipv6Anonymizer,\ NamedEntitiesAnonymizer('en')) anonymizedText = anon.anonymize(query) return anonymizedText
from anonymization import Anonymization text = "This is a message to Marco" class MarcoAnonymizer(): ''' Replace all occurences of Marco or marco with a star (*) ''' def __init__(self, anonymization: Anonymization): self.anonymization = anonymization def anonymize(self, text: str) -> str: return text.replace(r'Marco', '*') anon = Anonymization(None) marcoAnonymizer = MarcoAnonymizer(anon) print(marcoAnonymizer.anonymize(text))
from anonymization import Anonymization, AnonymizerChain, EmailAnonymizer, NamedEntitiesAnonymizer text = "Hi John,\nthanks for you for subscribing to Superprogram, feel free to ask me any question at [email protected] \n Superprogram the best program!" anon = AnonymizerChain(Anonymization('en_US')) anon.add_anonymizers(EmailAnonymizer, NamedEntitiesAnonymizer('en_core_web_lg')) clean_text, patch = anon.pseudonymize(text) print(clean_text) print(patch) revert_text = anon.revert(clean_text, patch) print(text == revert_text)
from anonymization import Anonymization, PhoneNumberAnonymizer text = "C'est bien le 0611223344 ton numéro ?" anon = Anonymization('fr_FR') phoneAnonymizer = PhoneNumberAnonymizer(anon) print(phoneAnonymizer.anonymize(text))