Пример #1
0
def anony_txt(txtFile):
    anon_en = AnonymizerChain(Anonymization('en_US'))
    anon_en.add_anonymizers(EmailAnonymizer, UriAnonymizer,
                            MacAddressAnonymizer, PhoneNumberAnonymizer,
                            INNAnonymizer, NamedEntitiesAnonymizer4('en'))

    anon_ru = AnonymizerChain(Anonymization('ru_RU'))
    anon_ru.add_anonymizers(EmailAnonymizer, UriAnonymizer,
                            MacAddressAnonymizer, PhoneNumberAnonymizer,
                            INNAnonymizer, NamedEntitiesAnonymizer4('ru'))

    with open(txtFile, 'r', encoding='utf-8-sig') as f:
        anfile = open(txtFile[:-4] + "_anonymized.txt", 'w', encoding='utf-8')
        rejfile = open(txtFile[:-4] + "_rejected_anonymization.txt",
                       'w',
                       encoding='utf-8')
        line = f.readline()
        while line:
            cfg.NERnumber = 0
            anline_en = anon_en.anonymize(line.split("\t")[0])
            NERnumber_en = cfg.NERnumber
            cfg.NERnumber = 0
            anline_ru = anon_ru.anonymize(line.split("\t")[1].strip())
            NERnumber_ru = cfg.NERnumber
            if NERnumber_en == NERnumber_ru:
                anfile.write(anline_en + "\t" + anline_ru + "\n")
            else:
                anfile.write(line)
                rejfile.write(anline_en + "\t" + anline_ru + "\n")
            line = f.readline()
        anfile.close()
        rejfile.close()
Пример #2
0
 def get_anonymize_text(self, query):
     anon = AnonymizerChain(Anonymization('en_US'))
     anon.add_anonymizers(FilePathAnonymizer,\
                         EmailAnonymizer, UriAnonymizer,MacAddressAnonymizer,Ipv4Anonymizer, Ipv6Anonymizer,\
                         NamedEntitiesAnonymizer('en'))
     anonymizedText = anon.anonymize(query)
     return anonymizedText
Пример #3
0
from anonymization import Anonymization

text = "This is a message to Marco"


class MarcoAnonymizer():
    '''
    Replace all occurences of Marco or marco with a star (*)
    '''
    def __init__(self, anonymization: Anonymization):
        self.anonymization = anonymization

    def anonymize(self, text: str) -> str:
        return text.replace(r'Marco', '*')


anon = Anonymization(None)
marcoAnonymizer = MarcoAnonymizer(anon)
print(marcoAnonymizer.anonymize(text))
Пример #4
0
from anonymization import Anonymization, AnonymizerChain, EmailAnonymizer, NamedEntitiesAnonymizer

text = "Hi John,\nthanks for you for subscribing to Superprogram, feel free to ask me any question at [email protected] \n Superprogram the best program!"
anon = AnonymizerChain(Anonymization('en_US'))
anon.add_anonymizers(EmailAnonymizer, NamedEntitiesAnonymizer('en_core_web_lg'))
clean_text, patch = anon.pseudonymize(text)

print(clean_text)
print(patch)

revert_text = anon.revert(clean_text, patch)

print(text == revert_text)
Пример #5
0
from anonymization import Anonymization, PhoneNumberAnonymizer

text = "C'est bien le 0611223344 ton numéro ?"
anon = Anonymization('fr_FR')
phoneAnonymizer = PhoneNumberAnonymizer(anon)
print(phoneAnonymizer.anonymize(text))