Exemplo n.º 1
0
    def __init__(
            self,
            # 16 or 32 byte key
            key,
            nonce=None,
            mode=AES_MODE_EAX,
            text_encoding='utf-8'):
        self.key = key
        Log.debug('Using key ' + str(str(self.key)) + '. Size = ' +
                  str(len(self.key)) + '.')
        self.cipher_mode_str = mode
        if self.cipher_mode_str == AES_Encrypt.AES_MODE_EAX:
            self.cipher_mode = AES.MODE_EAX
        elif self.cipher_mode_str == AES_Encrypt.AES_MODE_CBC:
            self.cipher_mode = AES.MODE_CBC
        else:
            raise Exception(
                str(self.__class__) + ' ' +
                str(getframeinfo(currentframe()).lineno) +
                ': Unsupported AES mode "' + str(self.cipher_mode_str) + '"')
        if nonce is None:
            # Must be 16 bytes
            # nonce = key[0:16]
            nonce = AES_Encrypt.generate_random_bytes(
                size=AES_Encrypt.SIZE_NONCE, printable=True)

        self.nonce = nonce
        Log.debug(
            str(self.__class__) + ' ' +
            str(getframeinfo(currentframe()).lineno) + ': Using nonce "' +
            str(self.nonce) + '". Size = ' + str(len(self.nonce)))

        self.text_encoding = text_encoding
        return
Exemplo n.º 2
0
    def convert_ascii_string_to_other_alphabet(
        ascii_char_string,
        # Default to CJK Unicode Block
        unicode_range=BLOCK_CHINESE,
        # If the characters come from a hexdigest from a hash, we can compress 4 times,
        # otherwise for a random ascii string, we can only compress 2 characters to 1 chinese.
        group_n_char=2):
        uni_len = unicode_range[1] - unicode_range[0] + 1

        r = len(ascii_char_string) % 4
        if r != 0:
            # Append 0's
            ascii_char_string = ascii_char_string + '0' * (4 - r)
        # raise Exception('Hash length ' + str(len(hash_hex_string))
        #                 + ' for "' + str(hash_hex_string) + '" not 0 modulo-4')

        hash_zh = ''

        len_block = int(len(ascii_char_string) / group_n_char)
        for i in range(0, len_block, 1):
            idx_start = group_n_char * i
            idx_end = idx_start + group_n_char
            s = ascii_char_string[idx_start:idx_end]

            # Convert to Chinese, Korean, etc
            if group_n_char == 2:
                ord_arr = np.array([ord(x) for x in s])
                val = ord_arr * np.array(
                    [2**(8 * (x - 1)) for x in range(len(ord_arr), 0, -1)])
                val = np.sum(val)
                Log.debug('Index start=' + str(idx_start) + ', end=' +
                          str(idx_end) + ', s=' + str(s) + ', ordinal=' +
                          str(ord_arr) + ', val=' + str(hex(val)))
                cjk_unicode = (val % uni_len) + unicode_range[0]
                hash_zh += chr(cjk_unicode)
            elif group_n_char == 4:
                Log.debug('Index start=' + str(idx_start) + ', end=' +
                          str(idx_end) + ', s=' + str(s))
                n = int('0x' + str(s), 16)
                cjk_unicode = (n % uni_len) + unicode_range[0]
                hash_zh += chr(cjk_unicode)
                Log.debugdebug('From ' + str(idx_start) + ': ' + str(s) +
                               ', n=' + str(n) + ', char=' +
                               str(chr(cjk_unicode)))

        return hash_zh
Exemplo n.º 3
0
    def run_unit_test(self):
        res_final = ut.ResultObj(count_ok=0, count_fail=0)

        s = '니는 먹고 싶어'
        tests_set_1 = [[Hash.ALGO_SHA1, '蔮膫圈嫩慁覕邜蹋妡狿'],
                       [Hash.ALGO_SHA256, '葶杊閹翔綐僤徼戻髯鼚胦嘭藃诠灑浽'],
                       [Hash.ALGO_SHA512, '詐鏙仟墍例嵝烐檦蝡溲薑珇鸦東燢爻纷欜陲囚劚攠菜槑茹輀濯偑袁蓣质簨'],
                       [Hash.ALGO_SHA3_256, '厥驹踸鸨揱澯鑢擠鳰僸覑儽悃徵絨控'],
                       [
                           Hash.ALGO_SHA3_512,
                           '醜怅僒础衺菼惓隔鮚腋釔晞鏙屜咖龩檵因伖蘦惌灱騾凊纅弪鮾蕏解铦欪臓'
                       ]]
        for x in tests_set_1:
            algo = x[0]
            expected = x[1]
            # In Linux command line, echo -n "$s" | shasum -a 1 (or 256,512)
            Log.debug('Using algo "' + str(algo) + '":')
            hstr = Hash.hash(string=s, algo=algo)
            Log.debug('Hash: ' + str(hstr))
            observed = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string=hstr,
                # unicode_range   = Hash.BLOCK_KOREAN_SYL,
                group_n_char=4)
            res_final.update_bool(
                res_bool=ut.UnitTest.assert_true(observed=observed,
                                                 expected=expected,
                                                 test_comment='test string "' +
                                                 str(hstr) + '" got "' +
                                                 str(observed) + '"'))

        tests_set_2 = [['abc/ii{}.!&%[][\\+=', '嵢弯敩睽簡琥坝坜礽縰'],
                       ['8829amsf)(*&^%^*./', '蘸耹嵭潦眨砦娥娪簯縰']]
        for x in tests_set_2:
            ascii_string = x[0]
            expected = x[1]
            observed = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string=ascii_string)
            res_final.update_bool(
                res_bool=ut.UnitTest.assert_true(observed=observed,
                                                 expected=expected,
                                                 test_comment='test string "' +
                                                 str(ascii_string) +
                                                 '" got "' + str(observed) +
                                                 '"'))

        return res_final
Exemplo n.º 4
0
    def xor_string(
            self,
            s1,
            s2
    ):
        Log.debug(
            str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': XOR between "' + str(s1) + '" and "' + str(s2) + '".'
        )

        len_s1 = len(s1)
        len_s2 = len(s2)
        len_max = max(len(s1), len(s2))

        # Append to the shorter one, in a repeat manner
        for i in range(len(s1), len_max, 1):
            s1 += s1[(i-len_s1)]
        for i in range(len(s2), len_max, 1):
            s2 += s2[(i-len_s2)]

        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': After appending, XOR between "' + str(s1) + '" and "' + str(s2) + '".'
        )

        Log.debugdebug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': s1 "' + str(s1) + '", s2 "' + str(s2) + '"'
        )

        b1 = bytes(s1, encoding=Obfuscate.STRING_ENCODING)
        b2 = bytes(s2, encoding=Obfuscate.STRING_ENCODING)

        bytes_xor = self.xor_bytes(
            b1 = b1,
            b2 = b2
        )

        return bytes_xor
Exemplo n.º 5
0
    def xor_bytes(
            self,
            b1,
            b2
    ):
        t12 = zip(b1,b2)

        res_xor = []
        for x in t12:
            byte_xor = x[0] ^ x[1]
            Log.debugdebug(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + 'XOR "' + str(hex(x[0])) + '" and "' + str(hex(x[1])) + '" = ' + str(hex(byte_xor))
            )
            res_xor.append(byte_xor)

        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': XOR between "' + str(self.hexdigest(b1))
            + '" and "' + str(self.hexdigest(b2))
            + '" = "' + str(self.hexdigest(res_xor)) + '"'
        )

        return res_xor
Exemplo n.º 6
0
    def run_unit_test(self):
        res_final = ut.ResultObj(count_ok=0, count_fail=0)

        long_str = ''
        for i in range(10000):
            long_str += random.choice(AES_Encrypt.CHARS_STR)
        sentences = [
            '니는 먹고 싶어', 'Дворянское ГНЕЗДО', '没问题 大陆 经济',
            '存款方式***2019-12-11 11:38:46***', '1234567890123456', long_str
        ]

        key = b'Sixteen byte key'
        nonce = b'0123456789xxyyzz'

        for mode in [AES_Encrypt.AES_MODE_CBC, AES_Encrypt.AES_MODE_EAX]:
            # aes_obj = AES_Encrypt(key=AES_Encrypt.generate_random_bytes(size=32, printable=True))
            aes_obj = AES_Encrypt(key=key + key, mode=mode, nonce=nonce)
            for s in sentences:
                Log.debug('Encrypting "' + str(s) + '"')
                data_bytes = bytes(s.encode(encoding=STR_ENCODING))
                Log.debug('Data length in bytes = ' + str(len(data_bytes)))
                res = aes_obj.encode(data=data_bytes)
                ciphertext = res.ciphertext_b64
                Log.debug('Encrypted as "' + str(ciphertext) + '"')

                plaintext = aes_obj.decode(ciphertext=ciphertext)
                Log.debug('Decrypted as "' + plaintext + '"')

                res_final.update_bool(
                    res_bool=ut.UnitTest.assert_true(observed=plaintext,
                                                     expected=s,
                                                     test_comment='mode "' +
                                                     str(mode) + '" s=' +
                                                     str(s) +
                                                     '" encrypted to "' +
                                                     str(ciphertext) +
                                                     '", decrypted back to "' +
                                                     str(plaintext)))

        return res_final
Exemplo n.º 7
0
Arquivo: Hide.py Projeto: mapktah/hide
    def hide_data(
            self,
            # In string JSON
            records_json,
            # Column names to hide
            hide_colname,
            encrypt_key_b64,
            nonce_b64        = None,
            is_number_only   = False,
            case_sensitive   = False,
            # We support processing only China for now
            process_phone_country = None,
            hash_encode_lang = 'zh',
    ):
        step = 0

        if type(records_json) is str:
            try:
                records_json = json.loads(
                    records_json
                )
            except Exception as ex_json:
                errmsg = \
                    str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Exception loading json: ' + str(records_json)\
                    + '. Got exception: ' + str(ex_json)
                Log.error(errmsg)
                return errmsg

        colname_clean            = str(hide_colname) + '_clean'
        colname_last4char        = str(hide_colname) + '_last4char'
        colname_hash             = str(hide_colname) + '_sha256'
        colname_hash_readable    = str(hide_colname) + '_sha256_readable'
        colname_encrypt          = str(hide_colname) + '_encrypt'
        colname_encrypt_readable = str(hide_colname) + '_encrypt_readable'

        df = pd.DataFrame(records_json)
        Log.debug(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Converted json object (first 20 records): '
            + str(records_json[0:min(20,len(records_json))])
            + ' to data frame: ' + str(df)
        )

        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Start processing records, hide column "' + str(hide_colname)
            + '". Records of sample rows' +  str(records_json[0:min(10,len(records_json))])
        )

        #
        # Step 1
        #  - Clean phone numbers, bank accounts
        #  - Extract last 4 digits of phone/bank-account numbers to separate columns
        #  - Obfuscate the phone numbers, bank accounts for storage in cube
        #
        step += 1
        start_filter_time = Profiling.start()
        def filter_col(
                x,
                is_number_only = False,
                case_sensitive = False
        ):
            try:
                # We always trim no matter what
                x = StringUtils.trim(str(x))
                if not case_sensitive:
                    x = x.lower()
                if is_number_only:
                    x = re.sub(pattern='[^0-9]', repl='', string=x)
                return x
            except Exception as ex_clean:
                Log.error(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Error cleaning "' + str(x) + '". ' + str(ex_clean)
                )
                return x
        df[colname_clean] = df[hide_colname].apply(filter_col, args=(is_number_only, case_sensitive))
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': BASIC CLEANING Took '
            + str(Profiling.get_time_dif_secs(start=start_filter_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully cleaned column "' + str(hide_colname)+
            '", case sensitive "' + str(case_sensitive)
            + '", is number "' + str(is_number_only)
            + '", sample rows: ' + str(df[0:2])
        )

        #
        # Process Phone Number by Country
        #
        step += 2
        start_phone_time = Profiling.start()
        def process_phone(
                x,
                country
        ):
            try:
                if country == 'china':
                    return PhoneNumber.filter_phone_china(x)
                else:
                    Log.error(
                        str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                        + ': Unsupported country "' + str(country) + '"'
                    )
                    return x
            except Exception as ex:
                Log.error(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Exception processing phone "' + str(x) + '". Exception ' + str(ex)
                )
                return x

        if process_phone_country == 'china':
            df[colname_clean] = df[colname_clean].apply(process_phone, args=[process_phone_country])
            Log.important(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Step ' + str(step) + ': PHONE CLEANING Took '
                + str(Profiling.get_time_dif_secs(start=start_phone_time, stop=Profiling.stop(), decimals=2))
                + ' secs. Successfully processed phone for column "' + str(hide_colname)
                + '", sample rows: ' + str(df[0:2])
            )

        #
        # Extract last 4 characters
        #
        step += 1
        start_last4_time = Profiling.start()
        def last4char(
                x
        ):
            len_x = len(str(x))
            if len_x >= 8:
                start = max(0, len_x - 4)
            else:
                start = len_x-1
            return '***' + str(x)[start:len_x]
        df[colname_last4char] = df[colname_clean].apply(last4char)
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': EXTRACT LAST 4 CHAR Took '
            + str(Profiling.get_time_dif_secs(start=start_last4_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully extracted last 4 chars from column "' + str(hide_colname)
            + '"'
        )

        #
        # Hash the column
        #
        step += 1
        start_hash_time = Profiling.start()
        def hash(
                x,
                desired_byte_len = 32
        ):
            s = Hash.hash(
                string = x,
                algo   = Hash.ALGO_SHA256
            )
            # obf = Obfuscate()
            # bytes_list = obf.hash_compression(
            #     s                   = str(x),
            #     desired_byte_length = desired_byte_len
            # )
            # s = obf.hexdigest(
            #     bytes_list    = bytes_list,
            #     unicode_range = None
            # )
            return s

        df[colname_hash] = df[colname_clean].apply(hash, args=[32])
        stop_hash_time = Profiling.start()
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH Took '
            + str(Profiling.get_time_dif_secs(start=start_hash_time, stop=stop_hash_time, decimals=2))
            + ' secs. Successfully obfuscated column "' + str(hide_colname)
            + '", sample rows: ' + str(df[0:2])
        )

        #
        # Obfuscate Hash hexdigest to Chinese/etc characters
        #
        step += 1
        start_obflang_time = Profiling.start()
        def obfuscate_hash_to_lang(
                x,
                lang
        ):
            unicode_range = Hash.BLOCK_CHINESE
            if lang == 'ko':
                unicode_range = Hash.BLOCK_KOREAN_SYL
            s = Hash.convert_ascii_string_to_other_alphabet(
                ascii_char_string = x,
                unicode_range     = unicode_range,
                group_n_char      = 4
            )
            return s

        df[colname_hash_readable] = df[colname_hash].apply(obfuscate_hash_to_lang, args=[hash_encode_lang])
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH TO CHAR Took '
            + str(Profiling.get_time_dif_secs(start=start_obflang_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully converted obfuscation to language for column "' + str(hide_colname)
            + '"'
        )

        #
        # Encryption
        #
        step += 1
        start_enc_time = Profiling.start()
        try:
            key_bytes = b64decode(encrypt_key_b64.encode('utf-8'))
        except Exception as ex_key_conversion:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error converting base64 key "' + str(encrypt_key_b64)
                + '" to bytes. Exception: ' + str(ex_key_conversion)
            )
        try:
            nonce_bytes = b64decode(nonce_b64.encode(encoding='utf-8'))
        except Exception as ex_nonce:
            Log.warning(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Error converting base64 nonce "' + str(nonce_b64)
                + '" to bytes. Exception: ' + str(ex_nonce)
            )
            nonce_bytes = None
        Log.important(
            str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': HASH Took '
            + ': Key bytes "' + str(key_bytes) + '", len = ' + str(len(key_bytes))
        )
        encryptor = AES_Encrypt(
            key   = key_bytes,
            mode  = AES_Encrypt.AES_MODE_CBC,
            nonce = nonce_bytes
        )
        def encrypt(
                x,
                encryptor
        ):
            try:
                # print('***** x=' + str(x))
                x_bytes = bytes(x.encode(encoding='utf-8'))
                # print('***** x_bytes=' + str(x_bytes))
                res = encryptor.encode(x_bytes)
                ciphermode = res.cipher_mode
                ciphertext_b64 = res.ciphertext_b64
                tag_b64 = res.tag_b64
                nonce_b64 = res.nonce_b64
                # print('***** cipher=' + str(cipher) + ', bytelen=' + str(len(cipher)))
                # plaintext = encryptor.decode(ciphertext=ciphertext_b64)
                # print('***** decrypted=' + str(plaintext) + ', ok=' + str(plaintext==x))
                # if plaintext != x:
                #     raise Exception('Decrypt Failed for x "' + str(x) + '", decypted "' + str(plaintext) + '"')
                return {
                    'ciphermode': ciphermode,
                    'ciphertext_b64': ciphertext_b64,
                    'tag_b64': tag_b64,
                    'iv_b64': nonce_b64
                }
            except Exception as ex:
                Log.error(
                    str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
                    + ': Error encrypting "' + str(x) + '": ' + str(ex)
                )
                return None

        df[colname_encrypt] = df[colname_clean].apply(encrypt, args=[encryptor])
        Log.important(
            str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
            + ': Step ' + str(step) + ': ENCRYPTION Took '
            + str(Profiling.get_time_dif_secs(start=start_enc_time, stop=Profiling.stop(), decimals=2))
            + ' secs. Successfully encrypted column "' + str(hide_colname)
            + '", for records (first 20 rows): ' + str(df.values[0:min(20,df.shape[0])])
        )

        # def obfuscate_cipher_to_lang(
        #         x,
        #         lang
        # ):
        #     unicode_range = Hash.BLOCK_CHINESE
        #     if lang == 'ko':
        #         unicode_range = Hash.BLOCK_KOREAN_SYL
        #     s = Hash.convert_ascii_string_to_other_alphabet(
        #         ascii_char_string = x['ciphertext_b64'],
        #         unicode_range     = unicode_range,
        #         group_n_char      = 2
        #     )
        #     return s
        #
        # df[colname_encrypt_readable] = df[colname_encrypt].apply(obfuscate_cipher_to_lang, args=[hash_encode_lang])

        df_json_str = df.to_json(
            # Make sure not ASCII
            force_ascii = False,
            orient      = 'records',
            # Don't need indexing
            # index       = False
        )

        return df_json_str