def check(self, monitor, verify=False, add_lease=False): s = StubServer("\x00" * 20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, count_happiness=10, count_shares_needed=3, count_shares_expected=10, count_shares_good=10, count_good_share_hosts=10, count_recoverable_versions=1, count_unrecoverable_versions=0, servers_responding=[s], sharemap={"seq1-abcd-sh0": [s]}, count_wrong_shares=0, list_corrupt_shares=[], count_corrupt_shares=0, list_incompatible_shares=[], count_incompatible_shares=0, summary="", report=[], share_problems=[], servermap=None) return defer.succeed(r)
def _repair_finished(repair_results): self.cr_results.repair_successful = repair_results.get_successful() r = CheckResults(from_string(self._node.get_uri()), self._storage_index) self.cr_results.post_repair_results = r self._fill_checker_results(repair_results.servermap, r) self.cr_results.repair_results = repair_results # TODO?
def __init__(self, node, storage_broker, history, monitor): self._node = node self._storage_broker = storage_broker self._history = history self._monitor = monitor self.bad_shares = [] # list of (nodeid,shnum,failure) self._storage_index = self._node.get_storage_index() self.results = CheckResults(from_string(node.get_uri()), self._storage_index) self.need_repair = False self.responded = set() # set of (binary) nodeids
def _gather_repair_results(self, ur, cr, crr): assert IUploadResults.providedBy(ur), ur # clone the cr (check results) to form the basis of the # prr (post-repair results) verifycap = self._verifycap servers_responding = set(cr.get_servers_responding()) sm = DictOfSets() assert isinstance(cr.get_sharemap(), DictOfSets) for shnum, servers in cr.get_sharemap().items(): for server in servers: sm.add(shnum, server) for shnum, servers in ur.get_sharemap().items(): for server in servers: sm.add(shnum, server) servers_responding.add(server) servers_responding = sorted(servers_responding) good_hosts = len(reduce(set.union, sm.values(), set())) is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) count_happiness = servers_of_happiness(sm) prr = CheckResults( cr.get_uri(), cr.get_storage_index(), healthy=is_healthy, recoverable=is_recoverable, count_happiness=count_happiness, count_shares_needed=verifycap.needed_shares, count_shares_expected=verifycap.total_shares, count_shares_good=len(sm), count_good_share_hosts=good_hosts, count_recoverable_versions=int(is_recoverable), count_unrecoverable_versions=int(not is_recoverable), servers_responding=list(servers_responding), sharemap=sm, count_wrong_shares=0, # no such thing as wrong, for immutable list_corrupt_shares=cr.get_corrupt_shares(), count_corrupt_shares=len(cr.get_corrupt_shares()), list_incompatible_shares=cr.get_incompatible_shares(), count_incompatible_shares=len(cr.get_incompatible_shares()), summary="", report=[], share_problems=[], servermap=None) crr.repair_successful = is_healthy crr.post_repair_results = prr return crr
def _gather_repair_results(ur): assert IUploadResults.providedBy(ur), ur # clone the cr (check results) to form the basis of the # prr (post-repair results) prr = CheckResults(cr.uri, cr.storage_index) prr.data = copy.deepcopy(cr.data) sm = prr.data['sharemap'] assert isinstance(sm, DictOfSets), sm sm.update(ur.sharemap) servers_responding = set(prr.data['servers-responding']) servers_responding.union(ur.sharemap.iterkeys()) prr.data['servers-responding'] = list(servers_responding) prr.data['count-shares-good'] = len(sm) prr.data['count-good-share-hosts'] = len(sm) is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) prr.set_healthy(is_healthy) prr.set_recoverable(is_recoverable) crr.repair_successful = is_healthy prr.set_needs_rebalancing(len(sm) >= verifycap.total_shares) crr.post_repair_results = prr return crr
def _make_checker_results(self, smap): self._monitor.raise_if_cancelled() healthy = True report = [] summary = [] vmap = smap.make_versionmap() recoverable = smap.recoverable_versions() unrecoverable = smap.unrecoverable_versions() if recoverable: report.append("Recoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), smap.summarize_version(v)) for v in recoverable])) if unrecoverable: report.append("Unrecoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), smap.summarize_version(v)) for v in unrecoverable])) if smap.unrecoverable_versions(): healthy = False summary.append("some versions are unrecoverable") report.append("Unhealthy: some versions are unrecoverable") if len(recoverable) == 0: healthy = False summary.append("no versions are recoverable") report.append("Unhealthy: no versions are recoverable") if len(recoverable) > 1: healthy = False summary.append("multiple versions are recoverable") report.append("Unhealthy: there are multiple recoverable versions") if recoverable: best_version = smap.best_recoverable_version() report.append("Best Recoverable Version: " + smap.summarize_version(best_version)) counters = self._count_shares(smap, best_version) s = counters["count-shares-good"] k = counters["count-shares-needed"] N = counters["count-shares-expected"] if s < N: healthy = False report.append("Unhealthy: best version has only %d shares " "(encoding is %d-of-%d)" % (s, k, N)) summary.append("%d shares (enc %d-of-%d)" % (s, k, N)) elif unrecoverable: healthy = False # find a k and N from somewhere first = list(unrecoverable)[0] # not exactly the best version, but that doesn't matter too much counters = self._count_shares(smap, first) else: # couldn't find anything at all counters = { "count-shares-good": 0, "count-shares-needed": 3, # arbitrary defaults "count-shares-expected": 10, "count-good-share-hosts": 0, "count-wrong-shares": 0, } corrupt_share_locators = [] problems = [] if self.bad_shares: report.append("Corrupt Shares:") summary.append("Corrupt Shares:") for (server, shnum, f) in sorted(self.bad_shares): serverid = server.get_serverid() locator = (server, self._storage_index, shnum) corrupt_share_locators.append(locator) s = "%s-sh%d" % (server.get_name(), shnum) if f.check(CorruptShareError): ft = f.value.reason else: ft = str(f) report.append(" %s: %s" % (s, ft)) summary.append(s) p = (serverid, self._storage_index, shnum, f) problems.append(p) msg = ("CorruptShareError during mutable verify, " "serverid=%(serverid)s, si=%(si)s, shnum=%(shnum)d, " "where=%(where)s") log.msg(format=msg, serverid=server.get_name(), si=base32.b2a(self._storage_index), shnum=shnum, where=ft, level=log.WEIRD, umid="EkK8QA") sharemap = dictutil.DictOfSets() for verinfo in vmap: for (shnum, server, timestamp) in vmap[verinfo]: shareid = "%s-sh%d" % (smap.summarize_version(verinfo), shnum) sharemap.add(shareid, server) if healthy: summary = "Healthy" else: summary = "Unhealthy: " + " ".join(summary) count_happiness = servers_of_happiness(sharemap) cr = CheckResults(from_string(self._node.get_uri()), self._storage_index, healthy=healthy, recoverable=bool(recoverable), count_happiness=count_happiness, count_shares_needed=counters["count-shares-needed"], count_shares_expected=counters["count-shares-expected"], count_shares_good=counters["count-shares-good"], count_good_share_hosts=counters["count-good-share-hosts"], count_recoverable_versions=len(recoverable), count_unrecoverable_versions=len(unrecoverable), servers_responding=list(smap.get_reachable_servers()), sharemap=sharemap, count_wrong_shares=counters["count-wrong-shares"], list_corrupt_shares=corrupt_share_locators, count_corrupt_shares=len(corrupt_share_locators), list_incompatible_shares=[], count_incompatible_shares=0, summary=summary, report=report, share_problems=problems, servermap=smap.copy()) return cr
def _format_results(self, results): cr = CheckResults(self._verifycap, self._verifycap.get_storage_index()) d = {} d['count-shares-needed'] = self._verifycap.needed_shares d['count-shares-expected'] = self._verifycap.total_shares verifiedshares = dictutil.DictOfSets() # {sharenum: set(serverid)} servers = {} # {serverid: set(sharenums)} corruptsharelocators = [] # (serverid, storageindex, sharenum) incompatiblesharelocators = [] # (serverid, storageindex, sharenum) for theseverifiedshares, thisserver, thesecorruptshares, theseincompatibleshares, thisresponded in results: thisserverid = thisserver.get_serverid() servers.setdefault(thisserverid, set()).update(theseverifiedshares) for sharenum in theseverifiedshares: verifiedshares.setdefault(sharenum, set()).add(thisserverid) for sharenum in thesecorruptshares: corruptsharelocators.append( (thisserverid, self._verifycap.get_storage_index(), sharenum)) for sharenum in theseincompatibleshares: incompatiblesharelocators.append( (thisserverid, self._verifycap.get_storage_index(), sharenum)) d['count-shares-good'] = len(verifiedshares) d['count-good-share-hosts'] = len( [s for s in servers.keys() if servers[s]]) assert len(verifiedshares) <= self._verifycap.total_shares, ( verifiedshares.keys(), self._verifycap.total_shares) if len(verifiedshares) == self._verifycap.total_shares: cr.set_healthy(True) cr.set_summary("Healthy") else: cr.set_healthy(False) cr.set_summary("Not Healthy: %d shares (enc %d-of-%d)" % (len(verifiedshares), self._verifycap.needed_shares, self._verifycap.total_shares)) if len(verifiedshares) >= self._verifycap.needed_shares: cr.set_recoverable(True) d['count-recoverable-versions'] = 1 d['count-unrecoverable-versions'] = 0 else: cr.set_recoverable(False) d['count-recoverable-versions'] = 0 d['count-unrecoverable-versions'] = 1 d['servers-responding'] = list(servers) d['sharemap'] = verifiedshares # no such thing as wrong shares of an immutable file d['count-wrong-shares'] = 0 d['list-corrupt-shares'] = corruptsharelocators d['count-corrupt-shares'] = len(corruptsharelocators) d['list-incompatible-shares'] = incompatiblesharelocators d['count-incompatible-shares'] = len(incompatiblesharelocators) # The file needs rebalancing if the set of servers that have at least # one share is less than the number of uniquely-numbered shares # available. cr.set_needs_rebalancing( d['count-good-share-hosts'] < d['count-shares-good']) cr.set_data(d) return cr
def _format_results(self, results): SI = self._verifycap.get_storage_index() verifiedshares = dictutil.DictOfSets() # {sharenum: set(server)} servers = {} # {server: set(sharenums)} corruptshare_locators = [] # (server, storageindex, sharenum) incompatibleshare_locators = [] # (server, storageindex, sharenum) servers_responding = set() # server for verified, server, corrupt, incompatible, responded in results: servers.setdefault(server, set()).update(verified) for sharenum in verified: verifiedshares.setdefault(sharenum, set()).add(server) for sharenum in corrupt: corruptshare_locators.append((server, SI, sharenum)) for sharenum in incompatible: incompatibleshare_locators.append((server, SI, sharenum)) if responded: servers_responding.add(server) good_share_hosts = len([s for s in servers.keys() if servers[s]]) assert len(verifiedshares) <= self._verifycap.total_shares, ( verifiedshares.keys(), self._verifycap.total_shares) if len(verifiedshares) == self._verifycap.total_shares: healthy = True summary = "Healthy" else: healthy = False summary = ("Not Healthy: %d shares (enc %d-of-%d)" % (len(verifiedshares), self._verifycap.needed_shares, self._verifycap.total_shares)) if len(verifiedshares) >= self._verifycap.needed_shares: recoverable = 1 unrecoverable = 0 else: recoverable = 0 unrecoverable = 1 count_happiness = servers_of_happiness(verifiedshares) cr = CheckResults( self._verifycap, SI, healthy=healthy, recoverable=bool(recoverable), count_happiness=count_happiness, count_shares_needed=self._verifycap.needed_shares, count_shares_expected=self._verifycap.total_shares, count_shares_good=len(verifiedshares), count_good_share_hosts=good_share_hosts, count_recoverable_versions=recoverable, count_unrecoverable_versions=unrecoverable, servers_responding=list(servers_responding), sharemap=verifiedshares, count_wrong_shares=0, # no such thing, for immutable list_corrupt_shares=corruptshare_locators, count_corrupt_shares=len(corruptshare_locators), list_incompatible_shares=incompatibleshare_locators, count_incompatible_shares=len(incompatibleshare_locators), summary=summary, report=[], share_problems=[], servermap=None) return cr
def _gather_repair_results(ur): assert IUploadResults.providedBy(ur), ur # clone the cr (check results) to form the basis of the # prr (post-repair results) prr = CheckResults(cr.uri, cr.storage_index) prr.data = copy.deepcopy(cr.data) sm = prr.data['sharemap'] assert isinstance(sm, DictOfSets), sm sm.update(ur.sharemap) servers_responding = set(prr.data['servers-responding']) servers_responding.union(ur.sharemap.iterkeys()) prr.data['servers-responding'] = list(servers_responding) prr.data['count-shares-good'] = len(sm) prr.data['count-good-share-hosts'] = len(sm) is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) prr.set_healthy(is_healthy) prr.set_recoverable(is_recoverable) crr.repair_successful = is_healthy prr.set_needs_rebalancing( len(sm) >= verifycap.total_shares) crr.post_repair_results = prr return crr
def _format_results(self, results): cr = CheckResults(self._verifycap, self._verifycap.get_storage_index()) d = {} d['count-shares-needed'] = self._verifycap.needed_shares d['count-shares-expected'] = self._verifycap.total_shares verifiedshares = dictutil.DictOfSets() # {sharenum: set(serverid)} servers = {} # {serverid: set(sharenums)} corruptsharelocators = [] # (serverid, storageindex, sharenum) incompatiblesharelocators = [] # (serverid, storageindex, sharenum) for theseverifiedshares, thisserverid, thesecorruptshares, theseincompatibleshares, thisresponded in results: servers.setdefault(thisserverid, set()).update(theseverifiedshares) for sharenum in theseverifiedshares: verifiedshares.setdefault(sharenum, set()).add(thisserverid) for sharenum in thesecorruptshares: corruptsharelocators.append((thisserverid, self._verifycap.get_storage_index(), sharenum)) for sharenum in theseincompatibleshares: incompatiblesharelocators.append((thisserverid, self._verifycap.get_storage_index(), sharenum)) d['count-shares-good'] = len(verifiedshares) d['count-good-share-hosts'] = len([s for s in servers.keys() if servers[s]]) assert len(verifiedshares) <= self._verifycap.total_shares, (verifiedshares.keys(), self._verifycap.total_shares) if len(verifiedshares) == self._verifycap.total_shares: cr.set_healthy(True) cr.set_summary("Healthy") else: cr.set_healthy(False) cr.set_summary("Not Healthy: %d shares (enc %d-of-%d)" % (len(verifiedshares), self._verifycap.needed_shares, self._verifycap.total_shares)) if len(verifiedshares) >= self._verifycap.needed_shares: cr.set_recoverable(True) d['count-recoverable-versions'] = 1 d['count-unrecoverable-versions'] = 0 else: cr.set_recoverable(False) d['count-recoverable-versions'] = 0 d['count-unrecoverable-versions'] = 1 d['servers-responding'] = list(servers) d['sharemap'] = verifiedshares # no such thing as wrong shares of an immutable file d['count-wrong-shares'] = 0 d['list-corrupt-shares'] = corruptsharelocators d['count-corrupt-shares'] = len(corruptsharelocators) d['list-incompatible-shares'] = incompatiblesharelocators d['count-incompatible-shares'] = len(incompatiblesharelocators) # The file needs rebalancing if the set of servers that have at least # one share is less than the number of uniquely-numbered shares # available. cr.set_needs_rebalancing(d['count-good-share-hosts'] < d['count-shares-good']) cr.set_data(d) return cr