def _parse_and_store_UEB(self, UEB_s): # Note: the UEB contains needed_shares and total_shares. These are # redundant and inferior (the filecap contains the authoritative # values). However, because it is possible to encode the same file in # multiple ways, and the encoders might choose (poorly) to use the # same key for both (therefore getting the same SI), we might # encounter shares for both types. The UEB hashes will be different, # however, and we'll disregard the "other" encoding's shares as # corrupted. # therefore, we ignore d['total_shares'] and d['needed_shares']. d = uri.unpack_extension(UEB_s) log.msg(format="UEB=%(ueb)s, vcap=%(vcap)s", ueb=repr(uri.unpack_extension_readable(UEB_s)), vcap=self._verifycap.to_string(), level=log.NOISY, parent=self._lp, umid="cVqZnA") k, N = self._verifycap.needed_shares, self._verifycap.total_shares self.segment_size = d['segment_size'] self._segsize_observers.fire(self.segment_size) r = self._calculate_sizes(self.segment_size) self.tail_segment_size = r["tail_segment_size"] self.tail_segment_padded = r["tail_segment_padded"] self.num_segments = r["num_segments"] self.block_size = r["block_size"] self.tail_block_size = r["tail_block_size"] log.msg("actual sizes: %s" % (r,), level=log.NOISY, parent=self._lp, umid="PY6P5Q") if (self.segment_size == self.guessed_segment_size and self.num_segments == self.guessed_num_segments): log.msg("my guess was right!", level=log.NOISY, parent=self._lp, umid="x340Ow") else: log.msg("my guess was wrong! Extra round trips for me.", level=log.NOISY, parent=self._lp, umid="tb7RJw") # zfec.Decode() instantiation is fast, but still, let's use the same # codec instance for all but the last segment. 3-of-10 takes 15us on # my laptop, 25-of-100 is 900us, 3-of-255 is 97us, 25-of-255 is # 2.5ms, worst-case 254-of-255 is 9.3ms self._codec = CRSDecoder() self._codec.set_params(self.segment_size, k, N) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.ciphertext_hash_tree was a guess before: # this is where we create it for real. self.ciphertext_hash_tree = IncompleteHashTree(self.num_segments) self.ciphertext_hash_tree_leaves = self.num_segments self.ciphertext_hash_tree.set_hashes({0: d['crypttext_root_hash']}) self.share_hash_tree.set_hashes({0: d['share_root_hash']})
def test_pack(self): data = {"stuff": "value", "size": 12, "needed_shares": 3, "big_hash": hashutil.tagged_hash("foo", "bar")} ext = uri.pack_extension(data) d = uri.unpack_extension(ext) self.failUnlessReallyEqual(d["stuff"], "value") self.failUnlessReallyEqual(d["size"], 12) self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) readable = uri.unpack_extension_readable(ext) self.failUnlessReallyEqual(readable["needed_shares"], 3) self.failUnlessReallyEqual(readable["stuff"], "value") self.failUnlessReallyEqual(readable["size"], 12) self.failUnlessReallyEqual(readable["big_hash"], base32.b2a(hashutil.tagged_hash("foo", "bar"))) self.failUnlessReallyEqual(readable["UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext)))
def test_pack(self): data = {"stuff": "value", "size": 12, "needed_shares": 3, "big_hash": hashutil.tagged_hash("foo", "bar"), } ext = uri.pack_extension(data) d = uri.unpack_extension(ext) self.failUnlessReallyEqual(d["stuff"], "value") self.failUnlessReallyEqual(d["size"], 12) self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) readable = uri.unpack_extension_readable(ext) self.failUnlessReallyEqual(readable["needed_shares"], 3) self.failUnlessReallyEqual(readable["stuff"], "value") self.failUnlessReallyEqual(readable["size"], 12) self.failUnlessReallyEqual(readable["big_hash"], base32.b2a(hashutil.tagged_hash("foo", "bar"))) self.failUnlessReallyEqual(readable["UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext)))
def _parse_and_validate(self, data): self.share_size = mathutil.div_ceil(self._verifycap.size, self._verifycap.needed_shares) d = uri.unpack_extension(data) # There are several kinds of things that can be found in a UEB. # First, things that we really need to learn from the UEB in order to # do this download. Next: things which are optional but not redundant # -- if they are present in the UEB they will get used. Next, things # that are optional and redundant. These things are required to be # consistent: they don't have to be in the UEB, but if they are in # the UEB then they will be checked for consistency with the # already-known facts, and if they are inconsistent then an exception # will be raised. These things aren't actually used -- they are just # tested for consistency and ignored. Finally: things which are # deprecated -- they ought not be in the UEB at all, and if they are # present then a warning will be logged but they are otherwise # ignored. # First, things that we really need to learn from the UEB: # segment_size, crypttext_root_hash, and share_root_hash. self.segment_size = d['segment_size'] self.block_size = mathutil.div_ceil(self.segment_size, self._verifycap.needed_shares) self.num_segments = mathutil.div_ceil(self._verifycap.size, self.segment_size) self.tail_data_size = self._verifycap.size % self.segment_size if not self.tail_data_size: self.tail_data_size = self.segment_size # padding for erasure code self.tail_segment_size = mathutil.next_multiple(self.tail_data_size, self._verifycap.needed_shares) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.crypttext_root_hash = d['crypttext_root_hash'] self.share_root_hash = d['share_root_hash'] # Next: things that are optional and not redundant: crypttext_hash if d.has_key('crypttext_hash'): self.crypttext_hash = d['crypttext_hash'] if len(self.crypttext_hash) != CRYPTO_VAL_SIZE: raise BadURIExtension('crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash),)) # Next: things that are optional, redundant, and required to be # consistent: codec_name, codec_params, tail_codec_params, # num_segments, size, needed_shares, total_shares if d.has_key('codec_name'): if d['codec_name'] != "crs": raise UnsupportedErasureCodec(d['codec_name']) if d.has_key('codec_params'): ucpss, ucpns, ucpts = codec.parse_params(d['codec_params']) if ucpss != self.segment_size: raise BadURIExtension("inconsistent erasure code params: " "ucpss: %s != self.segment_size: %s" % (ucpss, self.segment_size)) if ucpns != self._verifycap.needed_shares: raise BadURIExtension("inconsistent erasure code params: ucpns: %s != " "self._verifycap.needed_shares: %s" % (ucpns, self._verifycap.needed_shares)) if ucpts != self._verifycap.total_shares: raise BadURIExtension("inconsistent erasure code params: ucpts: %s != " "self._verifycap.total_shares: %s" % (ucpts, self._verifycap.total_shares)) if d.has_key('tail_codec_params'): utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params']) if utcpss != self.tail_segment_size: raise BadURIExtension("inconsistent erasure code params: utcpss: %s != " "self.tail_segment_size: %s, self._verifycap.size: %s, " "self.segment_size: %s, self._verifycap.needed_shares: %s" % (utcpss, self.tail_segment_size, self._verifycap.size, self.segment_size, self._verifycap.needed_shares)) if utcpns != self._verifycap.needed_shares: raise BadURIExtension("inconsistent erasure code params: utcpns: %s != " "self._verifycap.needed_shares: %s" % (utcpns, self._verifycap.needed_shares)) if utcpts != self._verifycap.total_shares: raise BadURIExtension("inconsistent erasure code params: utcpts: %s != " "self._verifycap.total_shares: %s" % (utcpts, self._verifycap.total_shares)) if d.has_key('num_segments'): if d['num_segments'] != self.num_segments: raise BadURIExtension("inconsistent num_segments: size: %s, " "segment_size: %s, computed_num_segments: %s, " "ueb_num_segments: %s" % (self._verifycap.size, self.segment_size, self.num_segments, d['num_segments'])) if d.has_key('size'): if d['size'] != self._verifycap.size: raise BadURIExtension("inconsistent size: URI size: %s, UEB size: %s" % (self._verifycap.size, d['size'])) if d.has_key('needed_shares'): if d['needed_shares'] != self._verifycap.needed_shares: raise BadURIExtension("inconsistent needed shares: URI needed shares: %s, UEB " "needed shares: %s" % (self._verifycap.total_shares, d['needed_shares'])) if d.has_key('total_shares'): if d['total_shares'] != self._verifycap.total_shares: raise BadURIExtension("inconsistent total shares: URI total shares: %s, UEB " "total shares: %s" % (self._verifycap.total_shares, d['total_shares'])) # Finally, things that are deprecated and ignored: plaintext_hash, # plaintext_root_hash if d.get('plaintext_hash'): log.msg("Found plaintext_hash in UEB. This field is deprecated for security reasons " "and is no longer used. Ignoring. %s" % (self,)) if d.get('plaintext_root_hash'): log.msg("Found plaintext_root_hash in UEB. This field is deprecated for security " "reasons and is no longer used. Ignoring. %s" % (self,)) return self
def _got_uri_extension(self, ueb): self.log("_got_uri_extension", level=log.NOISY) self._ueb_hash = hashutil.uri_extension_hash(ueb) self._ueb_data = uri.unpack_extension(ueb)
def _parse_and_validate(self, data): self.share_size = mathutil.div_ceil(self._verifycap.size, self._verifycap.needed_shares) d = uri.unpack_extension(data) # There are several kinds of things that can be found in a UEB. # First, things that we really need to learn from the UEB in order to # do this download. Next: things which are optional but not redundant # -- if they are present in the UEB they will get used. Next, things # that are optional and redundant. These things are required to be # consistent: they don't have to be in the UEB, but if they are in # the UEB then they will be checked for consistency with the # already-known facts, and if they are inconsistent then an exception # will be raised. These things aren't actually used -- they are just # tested for consistency and ignored. Finally: things which are # deprecated -- they ought not be in the UEB at all, and if they are # present then a warning will be logged but they are otherwise # ignored. # First, things that we really need to learn from the UEB: # segment_size, crypttext_root_hash, and share_root_hash. self.segment_size = d['segment_size'] self.block_size = mathutil.div_ceil(self.segment_size, self._verifycap.needed_shares) self.num_segments = mathutil.div_ceil(self._verifycap.size, self.segment_size) self.tail_data_size = self._verifycap.size % self.segment_size if not self.tail_data_size: self.tail_data_size = self.segment_size # padding for erasure code self.tail_segment_size = mathutil.next_multiple( self.tail_data_size, self._verifycap.needed_shares) # Ciphertext hash tree root is mandatory, so that there is at most # one ciphertext that matches this read-cap or verify-cap. The # integrity check on the shares is not sufficient to prevent the # original encoder from creating some shares of file A and other # shares of file B. self.crypttext_root_hash = d['crypttext_root_hash'] self.share_root_hash = d['share_root_hash'] # Next: things that are optional and not redundant: crypttext_hash if d.has_key('crypttext_hash'): self.crypttext_hash = d['crypttext_hash'] if len(self.crypttext_hash) != CRYPTO_VAL_SIZE: raise BadURIExtension( 'crypttext_hash is required to be hashutil.CRYPTO_VAL_SIZE bytes, not %s bytes' % (len(self.crypttext_hash), )) # Next: things that are optional, redundant, and required to be # consistent: codec_name, codec_params, tail_codec_params, # num_segments, size, needed_shares, total_shares if d.has_key('codec_name'): if d['codec_name'] != "crs": raise UnsupportedErasureCodec(d['codec_name']) if d.has_key('codec_params'): ucpss, ucpns, ucpts = codec.parse_params(d['codec_params']) if ucpss != self.segment_size: raise BadURIExtension("inconsistent erasure code params: " "ucpss: %s != self.segment_size: %s" % (ucpss, self.segment_size)) if ucpns != self._verifycap.needed_shares: raise BadURIExtension( "inconsistent erasure code params: ucpns: %s != " "self._verifycap.needed_shares: %s" % (ucpns, self._verifycap.needed_shares)) if ucpts != self._verifycap.total_shares: raise BadURIExtension( "inconsistent erasure code params: ucpts: %s != " "self._verifycap.total_shares: %s" % (ucpts, self._verifycap.total_shares)) if d.has_key('tail_codec_params'): utcpss, utcpns, utcpts = codec.parse_params(d['tail_codec_params']) if utcpss != self.tail_segment_size: raise BadURIExtension( "inconsistent erasure code params: utcpss: %s != " "self.tail_segment_size: %s, self._verifycap.size: %s, " "self.segment_size: %s, self._verifycap.needed_shares: %s" % (utcpss, self.tail_segment_size, self._verifycap.size, self.segment_size, self._verifycap.needed_shares)) if utcpns != self._verifycap.needed_shares: raise BadURIExtension( "inconsistent erasure code params: utcpns: %s != " "self._verifycap.needed_shares: %s" % (utcpns, self._verifycap.needed_shares)) if utcpts != self._verifycap.total_shares: raise BadURIExtension( "inconsistent erasure code params: utcpts: %s != " "self._verifycap.total_shares: %s" % (utcpts, self._verifycap.total_shares)) if d.has_key('num_segments'): if d['num_segments'] != self.num_segments: raise BadURIExtension( "inconsistent num_segments: size: %s, " "segment_size: %s, computed_num_segments: %s, " "ueb_num_segments: %s" % (self._verifycap.size, self.segment_size, self.num_segments, d['num_segments'])) if d.has_key('size'): if d['size'] != self._verifycap.size: raise BadURIExtension( "inconsistent size: URI size: %s, UEB size: %s" % (self._verifycap.size, d['size'])) if d.has_key('needed_shares'): if d['needed_shares'] != self._verifycap.needed_shares: raise BadURIExtension( "inconsistent needed shares: URI needed shares: %s, UEB " "needed shares: %s" % (self._verifycap.total_shares, d['needed_shares'])) if d.has_key('total_shares'): if d['total_shares'] != self._verifycap.total_shares: raise BadURIExtension( "inconsistent total shares: URI total shares: %s, UEB " "total shares: %s" % (self._verifycap.total_shares, d['total_shares'])) # Finally, things that are deprecated and ignored: plaintext_hash, # plaintext_root_hash if d.get('plaintext_hash'): log.msg( "Found plaintext_hash in UEB. This field is deprecated for security reasons " "and is no longer used. Ignoring. %s" % (self, )) if d.get('plaintext_root_hash'): log.msg( "Found plaintext_root_hash in UEB. This field is deprecated for security " "reasons and is no longer used. Ignoring. %s" % (self, )) return self