def test_data_equivalence(self, original, threads, use_dict): kwargs = {} if use_dict: kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) cctx = zstd.ZstdCompressor(level=1, write_content_size=True, write_checksum=True, **kwargs) frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) dctx = zstd.ZstdDecompressor(**kwargs) result = dctx.multi_decompress_to_buffer(frames_buffer) self.assertEqual(len(result), len(original)) for i, frame in enumerate(result): self.assertEqual(frame.tobytes(), original[i]) frames_list = [f.tobytes() for f in frames_buffer] result = dctx.multi_decompress_to_buffer(frames_list) self.assertEqual(len(result), len(original)) for i, frame in enumerate(result): self.assertEqual(frame.tobytes(), original[i])
def generate_zstd_dictionary_from_folder_path_and_file_matching_string_func( path_to_folder_containing_files_for_dictionary, file_matching_string): print('Now generating compression dictionary for ZSTD:') list_of_matching_input_file_paths = glob.glob( path_to_folder_containing_files_for_dictionary + file_matching_string) for cnt, current_input_file_path in enumerate( list_of_matching_input_file_paths): print('Now loading input file ' + str(cnt + 1) + ' out of ' + str(len(list_of_matching_input_file_paths))) with open(current_input_file_path, 'rb') as f: if cnt == 0: combined_dictionary_input_data = f.read() else: new_file_data = f.read() combined_dictionary_input_data = combined_dictionary_input_data + new_file_data animecoin_zstd_compression_dictionary = zstd.ZstdCompressionDict( combined_dictionary_input_data) print('Done generating compression dictionary!') return animecoin_zstd_compression_dictionary
def get_corresponding_zstd_compression_dictionary_func( sha256_hash_of_corresponding_zstd_compression_dictionary): global path_to_zstd_compression_dictionaries list_of_available_compression_dictionary_files = glob.glob( path_to_zstd_compression_dictionaries + '*.dict') found_dictionary_file = 0 if len(list_of_available_compression_dictionary_files) == 0: print( 'Error! No dictionary files were found. Try generating one or downloading one from the blockchain.' ) else: available_dictionary_file_hashes = [ os.path.split(x)[-1].replace('.dict', '') for x in list_of_available_compression_dictionary_files ] if sha256_hash_of_corresponding_zstd_compression_dictionary not in available_dictionary_file_hashes: print( 'Error! Cannot find the compression dictionary file used! Try downloading from the blockchain the dictionary with file hash: ' + sha256_hash_of_corresponding_zstd_compression_dictionary) else: found_dictionary_file = 1 dictionary_file_index_number = available_dictionary_file_hashes.index( sha256_hash_of_corresponding_zstd_compression_dictionary) dictionary_file_path = list_of_available_compression_dictionary_files[ dictionary_file_index_number] with open(dictionary_file_path, 'rb') as f: reconstructed_animecoin_zstd_compression_dictionary_raw_data = f.read( ) reconstructed_animecoin_zstd_compression_dictionary = zstd.ZstdCompressionDict( reconstructed_animecoin_zstd_compression_dictionary_raw_data ) if not found_dictionary_file: print( 'Attempting to generate dictionary file before giving up (not guaranteed to work!)' ) reconstructed_animecoin_zstd_compression_dictionary = generate_zstd_dictionary_from_folder_path_and_file_matching_string_func( path_where_animecoin_html_ticket_templates_are_stored, '*ticket*.html') reconstructed_animecoin_zstd_compression_dictionary_raw_data = reconstructed_animecoin_zstd_compression_dictionary.as_bytes( ) return reconstructed_animecoin_zstd_compression_dictionary
def test_simple(self): original = [ b'foo' * 64, b'foobar' * 64, b'baz' * 64, b'foobaz' * 64, b'foobarbaz' * 64, ] chunks = [] chunks.append( zstd.ZstdCompressor(write_content_size=True).compress(original[0])) for i, chunk in enumerate(original[1:]): d = zstd.ZstdCompressionDict(original[i]) cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True) chunks.append(cctx.compress(chunk)) for i in range(1, len(original)): chain = chunks[0:i] expected = original[i - 1] dctx = zstd.ZstdDecompressor() decompressed = dctx.decompress_content_dict_chain(chain) self.assertEqual(decompressed, expected)
def test_data_equivalence(self, original, threads, use_dict): kwargs = {} # Use a content dictionary because it is cheap to create. if use_dict: kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) cctx = zstd.ZstdCompressor(level=1, write_content_size=True, write_checksum=True, **kwargs) result = cctx.multi_compress_to_buffer(original, threads=-1) self.assertEqual(len(result), len(original)) # The frame produced via the batch APIs may not be bit identical to that # produced by compress() because compression parameters are adjusted # from the first input in batch mode. So the only thing we can do is # verify the decompressed data matches the input. dctx = zstd.ZstdDecompressor(**kwargs) for i, frame in enumerate(result): self.assertEqual(dctx.decompress(frame), original[i])
def test_bad_subsequent_input(self): initial = zstd.ZstdCompressor(write_content_size=True).compress( b'foo' * 64) dctx = zstd.ZstdDecompressor() with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): dctx.decompress_content_dict_chain([initial, u'foo']) with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): dctx.decompress_content_dict_chain([initial, None]) with self.assertRaisesRegexp( ValueError, 'chunk 1 is too small to contain a zstd frame'): dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): dctx.decompress_content_dict_chain([initial, b'foo' * 8]) no_size = zstd.ZstdCompressor().compress(b'foo' * 64) with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): dctx.decompress_content_dict_chain([initial, no_size]) # Corrupt second frame. cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) frame = cctx.compress(b'bar' * 64) frame = frame[0:12] + frame[15:] with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'): dctx.decompress_content_dict_chain([initial, frame])
def retrieve_data_from_animecoin_blockchain_func(blockchain_transaction_id): global rpc_connection_string, path_where_animecoin_html_ticket_templates_are_stored rpc_connection = AuthServiceProxy(rpc_connection_string) raw = rpc_connection.getrawtransaction(blockchain_transaction_id) outputs = raw.split('0100000000000000') encoded_hex_data = '' for output in outputs[ 1:-2]: # there are 3 65-byte parts in this that we need cur = 6 encoded_hex_data += output[cur:cur + 130] cur += 132 encoded_hex_data += output[cur:cur + 130] cur += 132 encoded_hex_data += output[cur:cur + 130] encoded_hex_data += outputs[-2][6:-4] reconstructed_combined_data = binascii.a2b_hex(encoded_hex_data).decode( 'utf-8') reconstructed_length_of_compressed_data_hex_string = reconstructed_combined_data[ 0: 30] # len(hexlify('{0:015}'.format(len(encoded_animecoin_zstd_compressed_data)).encode('utf-8'))) is 30 reconstructed_length_of_compressed_data_hex_string = int( unhexstr(reconstructed_length_of_compressed_data_hex_string).decode( 'utf-8').lstrip('0')) reconstructed_combined_data__remainder_1 = reconstructed_combined_data[30:] length_of_standard_hash_string = len( get_sha256_hash_of_input_data_func('test')) reconstructed_compression_dictionary_file_hash = reconstructed_combined_data__remainder_1[ 0:length_of_standard_hash_string] reconstructed_combined_data__remainder_2 = reconstructed_combined_data__remainder_1[ length_of_standard_hash_string:] reconstructed_uncompressed_data_file_hash = reconstructed_combined_data__remainder_2[ 0:length_of_standard_hash_string] reconstructed_combined_data__remainder_3 = reconstructed_combined_data__remainder_2[ length_of_standard_hash_string:] reconstructed_compressed_data_file_hash = reconstructed_combined_data__remainder_3[ 0:length_of_standard_hash_string] reconstructed_combined_data__remainder_4 = reconstructed_combined_data__remainder_3[ length_of_standard_hash_string:] reconstructed_encoded_animecoin_zstd_compressed_data_padded = reconstructed_combined_data__remainder_4.replace( 'A', '' ) #Note sure where this comes from; somehow it is introduced into the data (note this is "A" not "a"). calculated_padding_length = len( reconstructed_encoded_animecoin_zstd_compressed_data_padded ) - reconstructed_length_of_compressed_data_hex_string reconstructed_encoded_animecoin_zstd_compressed_data = reconstructed_encoded_animecoin_zstd_compressed_data_padded[ 0:-calculated_padding_length] reconstructed_animecoin_zstd_compressed_data = unhexstr( reconstructed_encoded_animecoin_zstd_compressed_data) hash_of_reconstructed_animecoin_zstd_compressed_data = get_sha256_hash_of_input_data_func( reconstructed_animecoin_zstd_compressed_data) assert (hash_of_reconstructed_animecoin_zstd_compressed_data == reconstructed_compressed_data_file_hash) list_of_available_compression_dictionary_files = glob.glob( path_to_zstd_compression_dictionaries + '*.dict') found_dictionary_file = 0 if len(list_of_available_compression_dictionary_files) == 0: print( 'Error! No dictionary files were found. Try generating one or downloading one from the blockchain.' ) else: available_dictionary_file_hashes = [ os.path.split(x)[-1].replace('.dict', '') for x in list_of_available_compression_dictionary_files ] if reconstructed_compression_dictionary_file_hash not in available_dictionary_file_hashes: print( 'Error! Cannot find the compression dictionary file used! Try downloading from the blockchain the dictionary with file hash: ' + reconstructed_compression_dictionary_file_hash) else: found_dictionary_file = 1 dictionary_file_index_number = available_dictionary_file_hashes.index( reconstructed_compression_dictionary_file_hash) dictionary_file_path = list_of_available_compression_dictionary_files[ dictionary_file_index_number] with open(dictionary_file_path, 'rb') as f: reconstructed_animecoin_zstd_compression_dictionary_raw_data = f.read( ) reconstructed_animecoin_zstd_compression_dictionary = zstd.ZstdCompressionDict( reconstructed_animecoin_zstd_compression_dictionary_raw_data ) if not found_dictionary_file: print( 'Attempting to generate dictionary file before giving up (not guaranteed to work!)' ) reconstructed_animecoin_zstd_compression_dictionary = generate_zstd_dictionary_from_folder_path_and_file_matching_string_func( path_where_animecoin_html_ticket_templates_are_stored, '*ticket*.html') reconstructed_animecoin_zstd_compression_dictionary_raw_data = reconstructed_animecoin_zstd_compression_dictionary.as_bytes( ) hash_of_compression_dictionary = get_sha256_hash_of_input_data_func( reconstructed_animecoin_zstd_compression_dictionary_raw_data) assert (hash_of_compression_dictionary == reconstructed_compression_dictionary_file_hash) reconstructed_animecoin_zstd_uncompressed_data = decompress_data_with_animecoin_zstd_func( reconstructed_animecoin_zstd_compressed_data, reconstructed_animecoin_zstd_compression_dictionary) hash_of_reconstructed_animecoin_zstd_uncompressed_data = get_sha256_hash_of_input_data_func( reconstructed_animecoin_zstd_uncompressed_data) assert (hash_of_reconstructed_animecoin_zstd_uncompressed_data == reconstructed_uncompressed_data_file_hash) print('Successfully reconstructed and decompressed data!') return reconstructed_animecoin_zstd_uncompressed_data