示例#1
0
    def test_data_equivalence(self, original, threads, use_dict):
        kwargs = {}
        if use_dict:
            kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])

        cctx = zstd.ZstdCompressor(level=1,
                                   write_content_size=True,
                                   write_checksum=True,
                                   **kwargs)

        frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)

        dctx = zstd.ZstdDecompressor(**kwargs)

        result = dctx.multi_decompress_to_buffer(frames_buffer)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])

        frames_list = [f.tobytes() for f in frames_buffer]
        result = dctx.multi_decompress_to_buffer(frames_list)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])
示例#2
0
def generate_zstd_dictionary_from_folder_path_and_file_matching_string_func(
        path_to_folder_containing_files_for_dictionary, file_matching_string):
    print('Now generating compression dictionary for ZSTD:')
    list_of_matching_input_file_paths = glob.glob(
        path_to_folder_containing_files_for_dictionary + file_matching_string)
    for cnt, current_input_file_path in enumerate(
            list_of_matching_input_file_paths):
        print('Now loading input file ' + str(cnt + 1) + ' out of ' +
              str(len(list_of_matching_input_file_paths)))
        with open(current_input_file_path, 'rb') as f:
            if cnt == 0:
                combined_dictionary_input_data = f.read()
            else:
                new_file_data = f.read()
                combined_dictionary_input_data = combined_dictionary_input_data + new_file_data
    animecoin_zstd_compression_dictionary = zstd.ZstdCompressionDict(
        combined_dictionary_input_data)
    print('Done generating compression dictionary!')
    return animecoin_zstd_compression_dictionary
def get_corresponding_zstd_compression_dictionary_func(
        sha256_hash_of_corresponding_zstd_compression_dictionary):
    global path_to_zstd_compression_dictionaries
    list_of_available_compression_dictionary_files = glob.glob(
        path_to_zstd_compression_dictionaries + '*.dict')
    found_dictionary_file = 0
    if len(list_of_available_compression_dictionary_files) == 0:
        print(
            'Error! No dictionary files were found. Try generating one or downloading one from the blockchain.'
        )
    else:
        available_dictionary_file_hashes = [
            os.path.split(x)[-1].replace('.dict', '')
            for x in list_of_available_compression_dictionary_files
        ]
        if sha256_hash_of_corresponding_zstd_compression_dictionary not in available_dictionary_file_hashes:
            print(
                'Error! Cannot find the compression dictionary file used! Try downloading from the blockchain the dictionary with file hash: '
                + sha256_hash_of_corresponding_zstd_compression_dictionary)
        else:
            found_dictionary_file = 1
            dictionary_file_index_number = available_dictionary_file_hashes.index(
                sha256_hash_of_corresponding_zstd_compression_dictionary)
            dictionary_file_path = list_of_available_compression_dictionary_files[
                dictionary_file_index_number]
            with open(dictionary_file_path, 'rb') as f:
                reconstructed_animecoin_zstd_compression_dictionary_raw_data = f.read(
                )
                reconstructed_animecoin_zstd_compression_dictionary = zstd.ZstdCompressionDict(
                    reconstructed_animecoin_zstd_compression_dictionary_raw_data
                )
    if not found_dictionary_file:
        print(
            'Attempting to generate dictionary file before giving up (not guaranteed to work!)'
        )
        reconstructed_animecoin_zstd_compression_dictionary = generate_zstd_dictionary_from_folder_path_and_file_matching_string_func(
            path_where_animecoin_html_ticket_templates_are_stored,
            '*ticket*.html')
        reconstructed_animecoin_zstd_compression_dictionary_raw_data = reconstructed_animecoin_zstd_compression_dictionary.as_bytes(
        )
    return reconstructed_animecoin_zstd_compression_dictionary
示例#4
0
    def test_simple(self):
        original = [
            b'foo' * 64,
            b'foobar' * 64,
            b'baz' * 64,
            b'foobaz' * 64,
            b'foobarbaz' * 64,
        ]

        chunks = []
        chunks.append(
            zstd.ZstdCompressor(write_content_size=True).compress(original[0]))
        for i, chunk in enumerate(original[1:]):
            d = zstd.ZstdCompressionDict(original[i])
            cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True)
            chunks.append(cctx.compress(chunk))

        for i in range(1, len(original)):
            chain = chunks[0:i]
            expected = original[i - 1]
            dctx = zstd.ZstdDecompressor()
            decompressed = dctx.decompress_content_dict_chain(chain)
            self.assertEqual(decompressed, expected)
示例#5
0
    def test_data_equivalence(self, original, threads, use_dict):
        kwargs = {}

        # Use a content dictionary because it is cheap to create.
        if use_dict:
            kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])

        cctx = zstd.ZstdCompressor(level=1,
                                   write_content_size=True,
                                   write_checksum=True,
                                   **kwargs)

        result = cctx.multi_compress_to_buffer(original, threads=-1)

        self.assertEqual(len(result), len(original))

        # The frame produced via the batch APIs may not be bit identical to that
        # produced by compress() because compression parameters are adjusted
        # from the first input in batch mode. So the only thing we can do is
        # verify the decompressed data matches the input.
        dctx = zstd.ZstdDecompressor(**kwargs)

        for i, frame in enumerate(result):
            self.assertEqual(dctx.decompress(frame), original[i])
示例#6
0
    def test_bad_subsequent_input(self):
        initial = zstd.ZstdCompressor(write_content_size=True).compress(
            b'foo' * 64)

        dctx = zstd.ZstdDecompressor()

        with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
            dctx.decompress_content_dict_chain([initial, u'foo'])

        with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
            dctx.decompress_content_dict_chain([initial, None])

        with self.assertRaisesRegexp(
                ValueError, 'chunk 1 is too small to contain a zstd frame'):
            dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])

        with self.assertRaisesRegexp(ValueError,
                                     'chunk 1 is not a valid zstd frame'):
            dctx.decompress_content_dict_chain([initial, b'foo' * 8])

        no_size = zstd.ZstdCompressor().compress(b'foo' * 64)

        with self.assertRaisesRegexp(ValueError,
                                     'chunk 1 missing content size in frame'):
            dctx.decompress_content_dict_chain([initial, no_size])

        # Corrupt second frame.
        cctx = zstd.ZstdCompressor(write_content_size=True,
                                   dict_data=zstd.ZstdCompressionDict(b'foo' *
                                                                      64))
        frame = cctx.compress(b'bar' * 64)
        frame = frame[0:12] + frame[15:]

        with self.assertRaisesRegexp(zstd.ZstdError,
                                     'could not decompress chunk 1'):
            dctx.decompress_content_dict_chain([initial, frame])
def retrieve_data_from_animecoin_blockchain_func(blockchain_transaction_id):
    global rpc_connection_string, path_where_animecoin_html_ticket_templates_are_stored
    rpc_connection = AuthServiceProxy(rpc_connection_string)
    raw = rpc_connection.getrawtransaction(blockchain_transaction_id)
    outputs = raw.split('0100000000000000')
    encoded_hex_data = ''
    for output in outputs[
            1:-2]:  # there are 3 65-byte parts in this that we need
        cur = 6
        encoded_hex_data += output[cur:cur + 130]
        cur += 132
        encoded_hex_data += output[cur:cur + 130]
        cur += 132
        encoded_hex_data += output[cur:cur + 130]
    encoded_hex_data += outputs[-2][6:-4]
    reconstructed_combined_data = binascii.a2b_hex(encoded_hex_data).decode(
        'utf-8')
    reconstructed_length_of_compressed_data_hex_string = reconstructed_combined_data[
        0:
        30]  # len(hexlify('{0:015}'.format(len(encoded_animecoin_zstd_compressed_data)).encode('utf-8'))) is 30
    reconstructed_length_of_compressed_data_hex_string = int(
        unhexstr(reconstructed_length_of_compressed_data_hex_string).decode(
            'utf-8').lstrip('0'))
    reconstructed_combined_data__remainder_1 = reconstructed_combined_data[30:]
    length_of_standard_hash_string = len(
        get_sha256_hash_of_input_data_func('test'))
    reconstructed_compression_dictionary_file_hash = reconstructed_combined_data__remainder_1[
        0:length_of_standard_hash_string]
    reconstructed_combined_data__remainder_2 = reconstructed_combined_data__remainder_1[
        length_of_standard_hash_string:]
    reconstructed_uncompressed_data_file_hash = reconstructed_combined_data__remainder_2[
        0:length_of_standard_hash_string]
    reconstructed_combined_data__remainder_3 = reconstructed_combined_data__remainder_2[
        length_of_standard_hash_string:]
    reconstructed_compressed_data_file_hash = reconstructed_combined_data__remainder_3[
        0:length_of_standard_hash_string]
    reconstructed_combined_data__remainder_4 = reconstructed_combined_data__remainder_3[
        length_of_standard_hash_string:]
    reconstructed_encoded_animecoin_zstd_compressed_data_padded = reconstructed_combined_data__remainder_4.replace(
        'A', ''
    )  #Note sure where this comes from; somehow it is introduced into the data (note this is "A" not "a").
    calculated_padding_length = len(
        reconstructed_encoded_animecoin_zstd_compressed_data_padded
    ) - reconstructed_length_of_compressed_data_hex_string
    reconstructed_encoded_animecoin_zstd_compressed_data = reconstructed_encoded_animecoin_zstd_compressed_data_padded[
        0:-calculated_padding_length]
    reconstructed_animecoin_zstd_compressed_data = unhexstr(
        reconstructed_encoded_animecoin_zstd_compressed_data)
    hash_of_reconstructed_animecoin_zstd_compressed_data = get_sha256_hash_of_input_data_func(
        reconstructed_animecoin_zstd_compressed_data)
    assert (hash_of_reconstructed_animecoin_zstd_compressed_data ==
            reconstructed_compressed_data_file_hash)
    list_of_available_compression_dictionary_files = glob.glob(
        path_to_zstd_compression_dictionaries + '*.dict')
    found_dictionary_file = 0
    if len(list_of_available_compression_dictionary_files) == 0:
        print(
            'Error! No dictionary files were found. Try generating one or downloading one from the blockchain.'
        )
    else:
        available_dictionary_file_hashes = [
            os.path.split(x)[-1].replace('.dict', '')
            for x in list_of_available_compression_dictionary_files
        ]
        if reconstructed_compression_dictionary_file_hash not in available_dictionary_file_hashes:
            print(
                'Error! Cannot find the compression dictionary file used! Try downloading from the blockchain the dictionary with file hash: '
                + reconstructed_compression_dictionary_file_hash)
        else:
            found_dictionary_file = 1
            dictionary_file_index_number = available_dictionary_file_hashes.index(
                reconstructed_compression_dictionary_file_hash)
            dictionary_file_path = list_of_available_compression_dictionary_files[
                dictionary_file_index_number]
            with open(dictionary_file_path, 'rb') as f:
                reconstructed_animecoin_zstd_compression_dictionary_raw_data = f.read(
                )
                reconstructed_animecoin_zstd_compression_dictionary = zstd.ZstdCompressionDict(
                    reconstructed_animecoin_zstd_compression_dictionary_raw_data
                )
    if not found_dictionary_file:
        print(
            'Attempting to generate dictionary file before giving up (not guaranteed to work!)'
        )
        reconstructed_animecoin_zstd_compression_dictionary = generate_zstd_dictionary_from_folder_path_and_file_matching_string_func(
            path_where_animecoin_html_ticket_templates_are_stored,
            '*ticket*.html')
        reconstructed_animecoin_zstd_compression_dictionary_raw_data = reconstructed_animecoin_zstd_compression_dictionary.as_bytes(
        )
    hash_of_compression_dictionary = get_sha256_hash_of_input_data_func(
        reconstructed_animecoin_zstd_compression_dictionary_raw_data)
    assert (hash_of_compression_dictionary ==
            reconstructed_compression_dictionary_file_hash)
    reconstructed_animecoin_zstd_uncompressed_data = decompress_data_with_animecoin_zstd_func(
        reconstructed_animecoin_zstd_compressed_data,
        reconstructed_animecoin_zstd_compression_dictionary)
    hash_of_reconstructed_animecoin_zstd_uncompressed_data = get_sha256_hash_of_input_data_func(
        reconstructed_animecoin_zstd_uncompressed_data)
    assert (hash_of_reconstructed_animecoin_zstd_uncompressed_data ==
            reconstructed_uncompressed_data_file_hash)
    print('Successfully reconstructed and decompressed data!')
    return reconstructed_animecoin_zstd_uncompressed_data