def test_encapsulate(self): """ Test the encapsulation method """ for test in TEST_DATA_LIST: self.assertEqual(test, decompress(_encapsulate(test))) if test: # noinspection PyTypeChecker self.assertEqual(test, decompress("".join(_encapsulate_list(list(test)))))
def cycle( self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True, strict=True ): """ Exercise a complete co -> dec cycle """ compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False, check_ascii=strict) backtracked_compressed_text = compress( input_str, compression_tree=compress_tree, backtracking=True, check_ascii=strict ) decompressed_text = decompress(compressed_text, decompress_table=decompress_table) backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table) classic_compresssed_text = compress_classic(input_str) classic_decompressed_test = decompress(classic_compresssed_text) if not quiet and input_str: print("---------------------------------------------------------------------") if show_input_and_output: print(decompressed_text) print(compressed_text) if backtracked_compressed_text != compressed_text: if show_input_and_output: print("--back tracked:--") print(backtracked_compressed_text) ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text))) b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text))) c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text))) bz2c = bz2.compress(input_str) zlibc = zlib.compress(input_str, 9) bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c))) zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc))) if backtracked_compressed_text != compressed_text: print( "backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes" % (b_ratio, b_ratio * 100.0, len(input_str), len(backtracked_compressed_text)) ) self.assertTrue( len(compressed_text) >= len(backtracked_compressed_text), "Back-tracking (%d) should always be better than not-backtracking (%d)" % (len(input_str), len(backtracked_compressed_text)), ) print( "compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes" % (ratio, ratio * 100.0, len(input_str), len(compressed_text)) ) print(" vs ") print(" zlib ratio 1:%f (%.2f%%) to %d bytes" % (zlibratio, zlibratio * 100.0, len(zlibc))) print(" bz2 ratio 1:%f (%.2f%%) to %d bytes" % (bz2ratio, bz2ratio * 100.0, len(bz2c))) print( " smaz classic 1:%f (%.2f%%) to %d bytes" % (c_ratio, c_ratio * 100.0, len(classic_compresssed_text)) ) self.assertEqual(input_str, decompressed_text) self.assertEqual(input_str, backtracked_decompressed_text) self.assertEqual(input_str, classic_decompressed_test)
def test_specific_bad_data(self): """ A few implict error/edge cases in the SMAZ algorithm """ buffer_overflow = chr(255) + chr(255) # Buffer overflow - expects 254 bytes, gets 0 multibyte_non_ascii = chr(255) + chr(1) + chr(200) + chr(200) # Non ascii multi-byte payload singlebyte_non_ascii = chr(254) + chr(129) # Non ascii single-byte payload self.assertFalse(_check_ascii(multibyte_non_ascii)) self.assertEqual(decompress(buffer_overflow, raise_on_error=False), None) self.assertRaises(ValueError, decompress, buffer_overflow, raise_on_error=True) self.assertEqual(decompress(multibyte_non_ascii), (chr(200) + chr(200))) # Returns non-ascii data self.assertRaises(ValueError, decompress, multibyte_non_ascii, raise_on_error=True, check_ascii=True) self.assertEqual(decompress(multibyte_non_ascii, raise_on_error=False, check_ascii=True), None) self.assertEqual(decompress(singlebyte_non_ascii), chr(129)) # Returns non-ascii data self.assertRaises(ValueError, decompress, singlebyte_non_ascii, raise_on_error=True, check_ascii=True) self.assertEqual(decompress(singlebyte_non_ascii, raise_on_error=False, check_ascii=True), None)
def cycle(self, input_str, quiet=False, compress_tree=None, decompress_table=None, show_input_and_output=True, strict=True): """ Exercise a complete co -> dec cycle """ compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=False, check_ascii=strict) backtracked_compressed_text = compress(input_str, compression_tree=compress_tree, backtracking=True, check_ascii=strict) decompressed_text = decompress(compressed_text, decompress_table=decompress_table) backtracked_decompressed_text = decompress(backtracked_compressed_text, decompress_table=decompress_table) classic_compresssed_text = compress_classic(input_str) classic_decompressed_test = decompress(classic_compresssed_text) if not quiet and input_str: print('---------------------------------------------------------------------') if show_input_and_output: print(decompressed_text) print(compressed_text) if backtracked_compressed_text != compressed_text: if show_input_and_output: print('--back tracked:--') print(backtracked_compressed_text) ratio = 1.0 / (float(len(input_str)) / float(len(compressed_text))) b_ratio = 1.0 / (float(len(input_str)) / float(len(backtracked_compressed_text))) c_ratio = 1.0 / (float(len(input_str)) / float(len(classic_compresssed_text))) bz2c = bz2.compress(input_str) zlibc = zlib.compress(input_str, 9) bz2ratio = 1.0 / (float(len(input_str)) / float(len(bz2c))) zlibratio = 1.0 / (float(len(input_str)) / float(len(zlibc))) if backtracked_compressed_text != compressed_text: print(('backtracked compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' % (b_ratio, b_ratio * 100., len(input_str), len(backtracked_compressed_text)))) self.assertTrue(len(compressed_text) >= len(backtracked_compressed_text), 'Back-tracking (%d) should always be better than not-backtracking (%d)' % (len(input_str), len(backtracked_compressed_text))) print(('compression ratio 1:%f (%.2f%%) from %d bytes to %d bytes' % (ratio, ratio * 100., len(input_str), len(compressed_text)))) print(' vs ') print((' zlib ratio 1:%f (%.2f%%) to %d bytes' % (zlibratio, zlibratio * 100., len(zlibc)))) print((' bz2 ratio 1:%f (%.2f%%) to %d bytes' % (bz2ratio, bz2ratio * 100., len(bz2c)))) print((' smaz classic 1:%f (%.2f%%) to %d bytes' % (c_ratio, c_ratio * 100., len(classic_compresssed_text)))) self.assertEqual(input_str, decompressed_text) self.assertEqual(input_str, backtracked_decompressed_text) self.assertEqual(input_str, classic_decompressed_test)
def decrypt(text_to_decrypt, encryption_base): digits = [] for i in range(48, 48 + encryption_base): digits.append(bytes(chr(i), "utf-8").decode("utf-8")) cipher = text_to_decrypt num = 0 power = len(cipher) - 1 text = "" for c in cipher: #[2:-1]: num += (digits.index(c) * (len(digits)**power)) power -= 1 for i in range(3 - (len(str(num)) % 3) if len(str(num)) % 3 != 0 else 0): num = f"0{num}" num = str(num) n = 3 nums = [int(num[i:i + n]) for i in range(0, len(num), n)] try: texto = smaz.decompress(bytes(nums)) except: texto = zlib.decompress(bytes(nums)).decode("utf-8") return texto
def decompress(self, inp): return smaz.decompress(inp)
def test_random_invalid_input(self): """ Test we don't go off the rails with a large random input """ allbytes = [chr(i) for i in range(255)] randominput = "".join(random.choice(allbytes) for _ in range(10000)) # 10kb of random bytes for i in range(2048): decompress(randominput[i:i + 4096], raise_on_error=False) # Walk through the 4k of the range
def test_worstsize(self): testcases = ['@' * i for i in range(0, 5000)] for test in testcases: self.assertEqual(len(_encapsulate(test)), _worst_size(len(test))) self.assertEqual(test, decompress(_encapsulate(test))) # Sanity checks self.assertEqual(test, decompress("".join(_encapsulate_list(list(test)))))
def process_message(self, peer, mailfrom, rcpttos, data): """ peer is a tuple containing (ipaddr, port) of the client that made the socket connection to our smtp port. mailfrom is the raw address the client claims the message is coming from. rcpttos is a list of raw addresses the client wishes to deliver the message to. data is a string containing the entire full text of the message, headers (if supplied) and all. It has been `de-transparencied' according to RFC 821, Section 4.5.2. In other words, a line containing a `.' followed by other text has had the leading dot removed. This function should return None, for a normal `250 Ok' response; otherwise it returns the desired response string in RFC 821 format. """ print peer, mailfrom, rcpttos, len(data) user_email = mailfrom.lower().strip() # Extract reply text from message message = get_reply_text(data) if not message: return None # Can't parse reply text item_id = rcpttos[0].split('@')[0] post_id = user_id = group_id = None if item_id.startswith('post'): post_id = item_id[4:] elif item_id.startswith('user'): user_id = item_id[4:] elif item_id.startswith('group'): group_id = item_id[5:] else: return None if post_id: post_id = post_id.replace('-', '/') while True: try: post_id = smaz.decompress(base64.b64decode(post_id)) break except TypeError: # Incorrect padding post_id = post_id + '=' post_id, db_name = post_id.split('-') if not post_id.isdigit(): return None post_id = int(post_id) user_id = api.get_user_id_from_email_address(user_email, db_name=db_name) if not user_id: return None session_id = api.get_session_id(user_id, db_name=db_name) if not session_id: return None api.new_comment(session_id, message, post_id, db_name=db_name) return None else: return None
def test_random_invalid_input(self): """ Test we don't go off the rails with a large random input """ allbytes = [chr(i) for i in xrange(255)] randominput = "".join(random.choice(allbytes) for _ in xrange(10000)) # 10kb of random bytes for i in xrange(2048): decompress(randominput[i : i + 4096], raise_on_error=False) # Walk through the 4k of the range
def test_worstsize(self): testcases = ["@" * i for i in xrange(0, 5000)] for test in testcases: self.assertEqual(len(_encapsulate(test)), _worst_size(len(test))) self.assertEqual(test, decompress(_encapsulate(test))) # Sanity checks self.assertEqual(test, decompress("".join(_encapsulate_list(list(test)))))