def _check_results(rres): self.assertThat(IRepairResults.providedBy(rres), Equals(True)) self.assertThat(rres.get_successful(), Equals(True)) # TODO: examine results self.copy_shares() initial_shares = self.old_shares[0] new_shares = self.old_shares[1] # TODO: this really shouldn't change anything. When we implement # a "minimal-bandwidth" repairer", change this test to assert: #self.assertThat(new_shares, Equals(initial_shares)) # all shares should be in the same place as before self.assertThat(set(initial_shares.keys()), Equals(set(new_shares.keys()))) # but they should all be at a newer seqnum. The IV will be # different, so the roothash will be too. for key in initial_shares: (version0, seqnum0, root_hash0, IV0, k0, N0, segsize0, datalen0, o0) = unpack_header(initial_shares[key]) (version1, seqnum1, root_hash1, IV1, k1, N1, segsize1, datalen1, o1) = unpack_header(new_shares[key]) self.assertThat(version0, Equals(version1)) self.assertThat(seqnum0 + 1, Equals(seqnum1)) self.assertThat(k0, Equals(k1)) self.assertThat(N0, Equals(N1)) self.assertThat(segsize0, Equals(segsize1)) self.assertThat(datalen0, Equals(datalen1))
def _got_results_one_share(self, shnum, data, peerid, lp): self.log(format="_got_results: got shnum #%(shnum)d from peerid %(peerid)s", shnum=shnum, peerid=idlib.shortnodeid_b2a(peerid), level=log.NOISY, parent=lp) # this might raise NeedMoreDataError, if the pubkey and signature # live at some weird offset. That shouldn't happen, so I'm going to # treat it as a bad share. (seqnum, root_hash, IV, k, N, segsize, datalength, pubkey_s, signature, prefix) = unpack_prefix_and_signature(data) if not self._node.get_pubkey(): fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s) assert len(fingerprint) == 32 if fingerprint != self._node.get_fingerprint(): raise CorruptShareError(peerid, shnum, "pubkey doesn't match fingerprint") self._node._populate_pubkey(self._deserialize_pubkey(pubkey_s)) if self._need_privkey: self._try_to_extract_privkey(data, peerid, shnum, lp) (ig_version, ig_seqnum, ig_root_hash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) offsets_tuple = tuple( [(key,value) for key,value in offsets.items()] ) verinfo = (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) if verinfo not in self._valid_versions: # it's a new pair. Verify the signature. valid = self._node.get_pubkey().verify(prefix, signature) if not valid: raise CorruptShareError(peerid, shnum, "signature is invalid") # ok, it's a valid verinfo. Add it to the list of validated # versions. self.log(" found valid version %d-%s from %s-sh%d: %d-%d/%d/%d" % (seqnum, base32.b2a(root_hash)[:4], idlib.shortnodeid_b2a(peerid), shnum, k, N, segsize, datalength), parent=lp) self._valid_versions.add(verinfo) # We now know that this is a valid candidate verinfo. if (peerid, shnum) in self._servermap.bad_shares: # we've been told that the rest of the data in this share is # unusable, so don't add it to the servermap. self.log("but we've been told this is a bad share", parent=lp, level=log.UNUSUAL) return verinfo # Add the info to our servermap. timestamp = time.time() self._servermap.add_new_share(peerid, shnum, verinfo, timestamp) # and the versionmap self.versionmap.add(verinfo, (shnum, peerid, timestamp)) return verinfo
def get_roothash_for(self, index): # return the roothash for the first share we see in the saved set shares = self._copied_shares[index] for peerid in shares: for shnum in shares[peerid]: share = shares[peerid][shnum] (version, seqnum, root_hash, IV, k, N, segsize, datalen, o) = unpack_header(share) return root_hash
def get_roothash_for(self, index): # return the roothash for the first share we see in the saved set shares = self._copied_shares[index] for peerid in shares: for shnum in shares[peerid]: share = shares[peerid][shnum] (version, seqnum, root_hash, IV, k, N, segsize, datalen, o) = \ unpack_header(share) return root_hash
def corrupt_share(options): import random from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_header from allmydata.immutable.layout import ReadBucketProxy out = options.stdout fn = options['filename'] assert options["offset"] == "block-random", "other offsets not implemented" # first, what kind of share is it? def flip_bit(start, end): offset = random.randrange(start, end) bit = random.randrange(0, 8) print("[%d..%d): %d.b%d" % (start, end, offset, bit), file=out) f = open(fn, "rb+") f.seek(offset) d = f.read(1) d = bchr(ord(d) ^ 0x01) f.seek(offset) f.write(d) f.close() with open(fn, "rb") as f: prefix = f.read(32) if MutableShareFile.is_valid_header(prefix): # mutable m = MutableShareFile(fn) with open(fn, "rb") as f: f.seek(m.DATA_OFFSET) # Read enough data to get a mutable header to unpack. data = f.read(2000) # make sure this slot contains an SMDF share assert data[ 0:1] == b"\x00", "non-SDMF mutable shares not supported" f.close() (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) assert version == 0, "we only handle v0 SDMF files" start = m.DATA_OFFSET + offsets["share_data"] end = m.DATA_OFFSET + offsets["enc_privkey"] flip_bit(start, end) else: # otherwise assume it's immutable f = ShareFile(fn) bp = ReadBucketProxy(None, None, '') offsets = bp._parse_offsets(f.read_share_data(0, 0x24)) start = f._data_offset + offsets["data"] end = f._data_offset + offsets["plaintext_hash_tree"] flip_bit(start, end)
def _corrupt_mutable_share_data(data, debug=False): prefix = data[:32] assert prefix == MutableShareFile.MAGIC, "This function is designed to corrupt mutable shares of v1, and the magic number doesn't look right: %r vs %r" % (prefix, MutableShareFile.MAGIC) data_offset = MutableShareFile.DATA_OFFSET sharetype = data[data_offset:data_offset+1] assert sharetype == "\x00", "non-SDMF mutable shares not supported" (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data[data_offset:]) assert version == 0, "this function only handles v0 SDMF files" start = data_offset + offsets["share_data"] length = data_offset + offsets["enc_privkey"] - start return corrupt_field(data, start, length)
def corrupt_share(options): import random from allmydata.storage.mutable import MutableShareFile from allmydata.storage.immutable import ShareFile from allmydata.mutable.layout import unpack_header from allmydata.immutable.layout import ReadBucketProxy out = options.stdout fn = options['filename'] assert options["offset"] == "block-random", "other offsets not implemented" # first, what kind of share is it? def flip_bit(start, end): offset = random.randrange(start, end) bit = random.randrange(0, 8) print >>out, "[%d..%d): %d.b%d" % (start, end, offset, bit) f = open(fn, "rb+") f.seek(offset) d = f.read(1) d = chr(ord(d) ^ 0x01) f.seek(offset) f.write(d) f.close() f = open(fn, "rb") prefix = f.read(32) f.close() if prefix == MutableShareFile.MAGIC: # mutable m = MutableShareFile(fn) f = open(fn, "rb") f.seek(m.DATA_OFFSET) data = f.read(2000) # make sure this slot contains an SMDF share assert data[0] == "\x00", "non-SDMF mutable shares not supported" f.close() (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) assert version == 0, "we only handle v0 SDMF files" start = m.DATA_OFFSET + offsets["share_data"] end = m.DATA_OFFSET + offsets["enc_privkey"] flip_bit(start, end) else: # otherwise assume it's immutable f = ShareFile(fn) bp = ReadBucketProxy(None, '', '') offsets = bp._parse_offsets(f.read_share_data(0, 0x24)) start = f._data_offset + offsets["data"] end = f._data_offset + offsets["plaintext_hash_tree"] flip_bit(start, end)
def dump_SDMF_share(m, length, options): from allmydata.mutable.layout import unpack_share, unpack_header from allmydata.mutable.common import NeedMoreDataError from allmydata.util import base32, hashutil from allmydata.uri import SSKVerifierURI from allmydata.util.encodingutil import quote_output, to_bytes offset = m.DATA_OFFSET out = options.stdout f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, 2000)) f.close() try: pieces = unpack_share(data) except NeedMoreDataError as e: # retry once with the larger size size = e.needed_bytes f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, size)) f.close() pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces (ig_version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) print(" SDMF contents:", file=out) print(" seqnum: %d" % seqnum, file=out) print(" root_hash: %s" % unicode(base32.b2a(root_hash), "utf-8"), file=out) print(" IV: %s" % unicode(base32.b2a(IV), "utf-8"), file=out) print(" required_shares: %d" % k, file=out) print(" total_shares: %d" % N, file=out) print(" segsize: %d" % segsize, file=out) print(" datalen: %d" % datalen, file=out) print(" enc_privkey: %d bytes" % len(enc_privkey), file=out) print(" pubkey: %d bytes" % len(pubkey), file=out) print(" signature: %d bytes" % len(signature), file=out) share_hash_ids = ",".join( sorted([str(hid) for hid in share_hash_chain.keys()])) print(" share_hash_chain: %s" % share_hash_ids, file=out) print(" block_hash_tree: %d nodes" % len(block_hash_tree), file=out) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) u = SSKVerifierURI(storage_index, fingerprint) verify_cap = u.to_string() print(" verify-cap:", quote_output(verify_cap, quotemarks=False), file=out) if options['offsets']: # NOTE: this offset-calculation code is fragile, and needs to be # merged with MutableShareFile's internals. print(file=out) print(" Section Offsets:", file=out) def printoffset(name, value, shift=0): print("%s%20s: %s (0x%x)" % (" " * shift, name, value, value), file=out) printoffset("first lease", m.HEADER_SIZE) printoffset("share data", m.DATA_OFFSET) o_seqnum = m.DATA_OFFSET + struct.calcsize(">B") printoffset("seqnum", o_seqnum, 2) o_root_hash = m.DATA_OFFSET + struct.calcsize(">BQ") printoffset("root_hash", o_root_hash, 2) for k in [ "signature", "share_hash_chain", "block_hash_tree", "share_data", "enc_privkey", "EOF" ]: name = { "share_data": "block data", "EOF": "end of share data" }.get(k, k) offset = m.DATA_OFFSET + offsets[k] printoffset(name, offset, 2) f = open(options['filename'], "rb") printoffset("extra leases", m._read_extra_lease_offset(f) + 4) f.close() print(file=out)
try: pieces = unpack_share(data) except NeedMoreDataError, e: # retry once with the larger size size = e.needed_bytes f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, size)) f.close() pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces (ig_version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) print >>out, " SDMF contents:" print >>out, " seqnum: %d" % seqnum print >>out, " root_hash: %s" % base32.b2a(root_hash) print >>out, " IV: %s" % base32.b2a(IV) print >>out, " required_shares: %d" % k print >>out, " total_shares: %d" % N print >>out, " segsize: %d" % segsize print >>out, " datalen: %d" % datalen print >>out, " enc_privkey: %d bytes" % len(enc_privkey) print >>out, " pubkey: %d bytes" % len(pubkey) print >>out, " signature: %d bytes" % len(signature) share_hash_ids = ",".join(sorted([str(hid) for hid in share_hash_chain.keys()])) print >>out, " share_hash_chain: %s" % share_hash_ids
def _generate_shares(self, shares_and_shareids): # this sets self.shares and self.root_hash self.log("_generate_shares") self._status.set_status("Generating Shares") started = time.time() # we should know these by now privkey = self._privkey encprivkey = self._encprivkey pubkey = self._pubkey (shares, share_ids) = shares_and_shareids assert len(shares) == len(share_ids) assert len(shares) == self.total_shares all_shares = {} block_hash_trees = {} share_hash_leaves = [None] * len(shares) for i in range(len(shares)): share_data = shares[i] shnum = share_ids[i] all_shares[shnum] = share_data # build the block hash tree. SDMF has only one leaf. leaves = [hashutil.block_hash(share_data)] t = hashtree.HashTree(leaves) block_hash_trees[shnum] = list(t) share_hash_leaves[shnum] = t[0] for leaf in share_hash_leaves: assert leaf is not None share_hash_tree = hashtree.HashTree(share_hash_leaves) share_hash_chain = {} for shnum in range(self.total_shares): needed_hashes = share_hash_tree.needed_hashes(shnum) share_hash_chain[shnum] = dict([(i, share_hash_tree[i]) for i in needed_hashes]) root_hash = share_hash_tree[0] assert len(root_hash) == 32 self.log("my new root_hash is %s" % base32.b2a(root_hash)) self._new_version_info = (self._new_seqnum, root_hash, self.salt) prefix = pack_prefix(self._new_seqnum, root_hash, self.salt, self.required_shares, self.total_shares, self.segment_size, len(self.newdata)) # now pack the beginning of the share. All shares are the same up # to the signature, then they have divergent share hash chains, # then completely different block hash trees + salt + share data, # then they all share the same encprivkey at the end. The sizes # of everything are the same for all shares. sign_started = time.time() signature = privkey.sign(prefix) self._status.timings["sign"] = time.time() - sign_started verification_key = pubkey.serialize() final_shares = {} for shnum in range(self.total_shares): final_share = pack_share(prefix, verification_key, signature, share_hash_chain[shnum], block_hash_trees[shnum], all_shares[shnum], encprivkey) final_shares[shnum] = final_share elapsed = time.time() - started self._status.timings["pack"] = elapsed self.shares = final_shares self.root_hash = root_hash # we also need to build up the version identifier for what we're # pushing. Extract the offsets from one of our shares. assert final_shares offsets = unpack_header(final_shares.values()[0])[-1] offsets_tuple = tuple([(key, value) for key, value in offsets.items()]) verinfo = (self._new_seqnum, root_hash, self.salt, self.segment_size, len(self.newdata), self.required_shares, self.total_shares, prefix, offsets_tuple) self.versioninfo = verinfo
def _generate_shares(self, shares_and_shareids): # this sets self.shares and self.root_hash self.log("_generate_shares") self._status.set_status("Generating Shares") started = time.time() # we should know these by now privkey = self._privkey encprivkey = self._encprivkey pubkey = self._pubkey (shares, share_ids) = shares_and_shareids assert len(shares) == len(share_ids) assert len(shares) == self.total_shares all_shares = {} block_hash_trees = {} share_hash_leaves = [None] * len(shares) for i in range(len(shares)): share_data = shares[i] shnum = share_ids[i] all_shares[shnum] = share_data # build the block hash tree. SDMF has only one leaf. leaves = [hashutil.block_hash(share_data)] t = hashtree.HashTree(leaves) block_hash_trees[shnum] = list(t) share_hash_leaves[shnum] = t[0] for leaf in share_hash_leaves: assert leaf is not None share_hash_tree = hashtree.HashTree(share_hash_leaves) share_hash_chain = {} for shnum in range(self.total_shares): needed_hashes = share_hash_tree.needed_hashes(shnum) share_hash_chain[shnum] = dict( [ (i, share_hash_tree[i]) for i in needed_hashes ] ) root_hash = share_hash_tree[0] assert len(root_hash) == 32 self.log("my new root_hash is %s" % base32.b2a(root_hash)) self._new_version_info = (self._new_seqnum, root_hash, self.salt) prefix = pack_prefix(self._new_seqnum, root_hash, self.salt, self.required_shares, self.total_shares, self.segment_size, len(self.newdata)) # now pack the beginning of the share. All shares are the same up # to the signature, then they have divergent share hash chains, # then completely different block hash trees + salt + share data, # then they all share the same encprivkey at the end. The sizes # of everything are the same for all shares. sign_started = time.time() signature = privkey.sign(prefix) self._status.timings["sign"] = time.time() - sign_started verification_key = pubkey.serialize() final_shares = {} for shnum in range(self.total_shares): final_share = pack_share(prefix, verification_key, signature, share_hash_chain[shnum], block_hash_trees[shnum], all_shares[shnum], encprivkey) final_shares[shnum] = final_share elapsed = time.time() - started self._status.timings["pack"] = elapsed self.shares = final_shares self.root_hash = root_hash # we also need to build up the version identifier for what we're # pushing. Extract the offsets from one of our shares. assert final_shares offsets = unpack_header(final_shares.values()[0])[-1] offsets_tuple = tuple( [(key,value) for key,value in offsets.items()] ) verinfo = (self._new_seqnum, root_hash, self.salt, self.segment_size, len(self.newdata), self.required_shares, self.total_shares, prefix, offsets_tuple) self.versioninfo = verinfo
def dump_SDMF_share(m, length, options): from allmydata.mutable.layout import unpack_share, unpack_header from allmydata.mutable.common import NeedMoreDataError from allmydata.util import base32, hashutil from allmydata.uri import SSKVerifierURI from allmydata.util.encodingutil import quote_output, to_str offset = m.DATA_OFFSET out = options.stdout f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, 2000)) f.close() try: pieces = unpack_share(data) except NeedMoreDataError as e: # retry once with the larger size size = e.needed_bytes f = open(options['filename'], "rb") f.seek(offset) data = f.read(min(length, size)) f.close() pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) = pieces (ig_version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) print(" SDMF contents:", file=out) print(" seqnum: %d" % seqnum, file=out) print(" root_hash: %s" % base32.b2a(root_hash), file=out) print(" IV: %s" % base32.b2a(IV), file=out) print(" required_shares: %d" % k, file=out) print(" total_shares: %d" % N, file=out) print(" segsize: %d" % segsize, file=out) print(" datalen: %d" % datalen, file=out) print(" enc_privkey: %d bytes" % len(enc_privkey), file=out) print(" pubkey: %d bytes" % len(pubkey), file=out) print(" signature: %d bytes" % len(signature), file=out) share_hash_ids = ",".join(sorted([str(hid) for hid in share_hash_chain.keys()])) print(" share_hash_chain: %s" % share_hash_ids, file=out) print(" block_hash_tree: %d nodes" % len(block_hash_tree), file=out) # the storage index isn't stored in the share itself, so we depend upon # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: piece = to_str(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) u = SSKVerifierURI(storage_index, fingerprint) verify_cap = u.to_string() print(" verify-cap:", quote_output(verify_cap, quotemarks=False), file=out) if options['offsets']: # NOTE: this offset-calculation code is fragile, and needs to be # merged with MutableShareFile's internals. print(file=out) print(" Section Offsets:", file=out) def printoffset(name, value, shift=0): print("%s%20s: %s (0x%x)" % (" "*shift, name, value, value), file=out) printoffset("first lease", m.HEADER_SIZE) printoffset("share data", m.DATA_OFFSET) o_seqnum = m.DATA_OFFSET + struct.calcsize(">B") printoffset("seqnum", o_seqnum, 2) o_root_hash = m.DATA_OFFSET + struct.calcsize(">BQ") printoffset("root_hash", o_root_hash, 2) for k in ["signature", "share_hash_chain", "block_hash_tree", "share_data", "enc_privkey", "EOF"]: name = {"share_data": "block data", "EOF": "end of share data"}.get(k,k) offset = m.DATA_OFFSET + offsets[k] printoffset(name, offset, 2) f = open(options['filename'], "rb") printoffset("extra leases", m._read_extra_lease_offset(f) + 4) f.close() print(file=out)
def _check_results(rres): self.failUnless(IRepairResults.providedBy(rres)) self.failUnless(rres.get_successful()) # TODO: examine results self.copy_shares() initial_shares = self.old_shares[0] new_shares = self.old_shares[1] # TODO: this really shouldn't change anything. When we implement # a "minimal-bandwidth" repairer", change this test to assert: # self.failUnlessEqual(new_shares, initial_shares) # all shares should be in the same place as before self.failUnlessEqual(set(initial_shares.keys()), set(new_shares.keys())) # but they should all be at a newer seqnum. The IV will be # different, so the roothash will be too. for key in initial_shares: (version0, seqnum0, root_hash0, IV0, k0, N0, segsize0, datalen0, o0) = unpack_header( initial_shares[key] ) (version1, seqnum1, root_hash1, IV1, k1, N1, segsize1, datalen1, o1) = unpack_header(new_shares[key]) self.failUnlessEqual(version0, version1) self.failUnlessEqual(seqnum0 + 1, seqnum1) self.failUnlessEqual(k0, k1) self.failUnlessEqual(N0, N1) self.failUnlessEqual(segsize0, segsize1) self.failUnlessEqual(datalen0, datalen1)