def _encode_tail_segment(self, segnum): start = time.time() codec = self._tail_codec input_piece_size = codec.get_block_size() crypttext_segment_hasher = hashutil.crypttext_segment_hasher() d = self._gather_data(self.required_shares, input_piece_size, crypttext_segment_hasher, allow_short=True) def _done_gathering(chunks): for c in chunks: # a short trailing chunk will have been padded by # _gather_data assert len(c) == input_piece_size self._crypttext_hashes.append(crypttext_segment_hasher.digest()) return codec.encode(chunks) d.addCallback(_done_gathering) def _done(res): elapsed = time.time() - start self._times["cumulative_encoding"] += elapsed return res d.addCallback(_done) return d
def _encode_segment(self, segnum): codec = self._codec start = time.time() # the ICodecEncoder API wants to receive a total of self.segment_size # bytes on each encode() call, broken up into a number of # identically-sized pieces. Due to the way the codec algorithm works, # these pieces need to be the same size as the share which the codec # will generate. Therefore we must feed it with input_piece_size that # equals the output share size. input_piece_size = codec.get_block_size() # as a result, the number of input pieces per encode() call will be # equal to the number of required shares with which the codec was # constructed. You can think of the codec as chopping up a # 'segment_size' of data into 'required_shares' shares (not doing any # fancy math at all, just doing a split), then creating some number # of additional shares which can be substituted if the primary ones # are unavailable # we read data from the source one segment at a time, and then chop # it into 'input_piece_size' pieces before handing it to the codec crypttext_segment_hasher = hashutil.crypttext_segment_hasher() # memory footprint: we only hold a tiny piece of the plaintext at any # given time. We build up a segment's worth of cryptttext, then hand # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB = # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the # footprint to 430KiB at the expense of more hash-tree overhead. d = self._gather_data(self.required_shares, input_piece_size, crypttext_segment_hasher) def _done_gathering(chunks): for c in chunks: assert len(c) == input_piece_size self._crypttext_hashes.append(crypttext_segment_hasher.digest()) # during this call, we hit 5*segsize memory return codec.encode(chunks) d.addCallback(_done_gathering) def _done(res): elapsed = time.time() - start self._times["cumulative_encoding"] += elapsed return res d.addCallback(_done) return d
def test_hashers(self): h1 = hashutil.block_hash(b"foo") h2 = hashutil.block_hasher() h2.update(b"foo") self.failUnlessEqual(h1, h2.digest()) self.assertIsInstance(h1, bytes) h1 = hashutil.uri_extension_hash(b"foo") h2 = hashutil.uri_extension_hasher() h2.update(b"foo") self.failUnlessEqual(h1, h2.digest()) self.assertIsInstance(h1, bytes) h1 = hashutil.plaintext_hash(b"foo") h2 = hashutil.plaintext_hasher() h2.update(b"foo") self.failUnlessEqual(h1, h2.digest()) self.assertIsInstance(h1, bytes) h1 = hashutil.crypttext_hash(b"foo") h2 = hashutil.crypttext_hasher() h2.update(b"foo") self.failUnlessEqual(h1, h2.digest()) self.assertIsInstance(h1, bytes) h1 = hashutil.crypttext_segment_hash(b"foo") h2 = hashutil.crypttext_segment_hasher() h2.update(b"foo") self.failUnlessEqual(h1, h2.digest()) self.assertIsInstance(h1, bytes) h1 = hashutil.plaintext_segment_hash(b"foo") h2 = hashutil.plaintext_segment_hasher() h2.update(b"foo") self.failUnlessEqual(h1, h2.digest()) self.assertIsInstance(h1, bytes)
def _encode_segment(self, segnum, is_tail): """ Encode one segment of input into the configured number of shares. :param segnum: Ostensibly, the number of the segment to encode. In reality, this parameter is ignored and the *next* segment is encoded and returned. :param bool is_tail: ``True`` if this is the last segment, ``False`` otherwise. :return: A ``Deferred`` which fires with a two-tuple. The first element is a list of string-y objects representing the encoded segment data for one of the shares. The second element is a list of integers giving the share numbers of the shares in the first element. """ codec = self._tail_codec if is_tail else self._codec start = time.time() # the ICodecEncoder API wants to receive a total of self.segment_size # bytes on each encode() call, broken up into a number of # identically-sized pieces. Due to the way the codec algorithm works, # these pieces need to be the same size as the share which the codec # will generate. Therefore we must feed it with input_piece_size that # equals the output share size. input_piece_size = codec.get_block_size() # as a result, the number of input pieces per encode() call will be # equal to the number of required shares with which the codec was # constructed. You can think of the codec as chopping up a # 'segment_size' of data into 'required_shares' shares (not doing any # fancy math at all, just doing a split), then creating some number # of additional shares which can be substituted if the primary ones # are unavailable # we read data from the source one segment at a time, and then chop # it into 'input_piece_size' pieces before handing it to the codec crypttext_segment_hasher = hashutil.crypttext_segment_hasher() # memory footprint: we only hold a tiny piece of the plaintext at any # given time. We build up a segment's worth of cryptttext, then hand # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB = # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the # footprint to 430KiB at the expense of more hash-tree overhead. d = self._gather_data(self.required_shares, input_piece_size, crypttext_segment_hasher, allow_short=is_tail) def _done_gathering(chunks): for c in chunks: # If is_tail then a short trailing chunk will have been padded # by _gather_data assert len(c) == input_piece_size self._crypttext_hashes.append(crypttext_segment_hasher.digest()) # during this call, we hit 5*segsize memory return codec.encode(chunks) d.addCallback(_done_gathering) def _done(res): elapsed = time.time() - start self._times["cumulative_encoding"] += elapsed return res d.addCallback(_done) return d