def get_status_for_source(self, context, source): source_file = state_filename(context, source, source) source_states = persistent_dict.PersistentDict(source_file) self.logger.info(self.inspect_source(context, source, source_states)) for replica in self.config.get_replicas_for_context(context): replica_file = state_filename(context, source, replica) replica_states = persistent_dict.PersistentDict(replica_file) msg = self.inspect_replica(replica, source_states, replica_states) self.logger.info(f" {msg}")
def test_lazyio(self): pd = persistent_dict.PersistentDict("testfile.txt", 5) pd.set("thing", 1) pd.set("thing two", "two") pd2 = persistent_dict.PersistentDict("testfile.txt") with self.assertRaises(KeyError): pd2.get("thing") time.sleep(6) pd.set("thing", 2) pd2.read() self.assertEqual(pd2.get("thing two"), "two")
def test_lazyio(self): pd = persistent_dict.PersistentDict("testfile.txt", lazy_write=5) pd["thing"] = 1 pd["thing two"] = "two" pd2 = persistent_dict.PersistentDict("testfile.txt") print(pd.data) print(pd2.data) with self.assertRaises(KeyError): print(pd2["thing two"]) time.sleep(6) pd["thing"] = 2 pd2.read() self.assertEquals(pd2["thing two"], "two")
def get_status_for_replica(self, context, replica, brief=False): source = self.config.get_source_for_context(context) source_file = state_filename(context, replica, source) source_states = persistent_dict.PersistentDict(source_file) replica_file = state_filename(context, replica, replica) self.logger.debug(f"replica file: {replica_file}") replica_states = persistent_dict.PersistentDict(replica_file) msg = self.inspect_replica(replica, source_states, replica_states, brief) self.logger.info(msg) if not brief: (files, bytes) = sizeof(source_states) self.logger.info(f" Source: {source}: " \ + f"{files} files, {bytes/2**30:.2f}GB")
def __init__(self, context): super().__init__() self.context = context self.logger = logging.getLogger(utils.logger_str(__class__) \ + " " + context) # self.logger.setLevel(logging.INFO) self.config = config.Config.instance() self.copies = int(self.config.get(self.context, "copies", 2)) self.path = config.path_for(self.config.get(self.context, "source")) self.scanner = scanner.Scanner(self.context, self.path) lazy_write = utils.str_to_duration( self.config.get(context, "LAZY WRITE", 5)) # TODO: support expiration self.rescan = utils.str_to_duration( self.config.get(self.context, "rescan")) self.clients = persistent_dict.PersistentDict( f"/tmp/cb.s{context}.json.bz2", lazy_write=lazy_write, cls=lock.Lock, expiry=self.rescan) self.drains = elapsed.ExpiringDict(300) # NOT persistent! self.locks = locker.Locker(5) # TODO: timers should relate to a configurable cycle time self.bailout = False self.stats = {'claims': 0, 'drops': 0} self.handling = False
def __init__(self, path, ignorals, state_filename): self.logger = logging.getLogger("gc.scanner") self.config = config.Config.instance() self.path = path self.ignorals = ignorals self.states = persistent_dict.PersistentDict(state_filename, \ self.config.getOption("LAZY_WRITE", 5))
def __init__(self, context): super().__init__(context) self.source = self.config.get_source_for_context(context) self.path = config.path_for(self.source) persistent_dict_file = f"{self.path}/.ghetto_cluster/" \ f"source.{context}.json" self.states = persistent_dict.PersistentDict(persistent_dict_file, \ self.config.getOption("LAZY_WRITE", 5)) self.logger = logging.getLogger(logger_str(__class__))
def test_dirty(self): pd = persistent_dict.PersistentDict("testfile.txt") pd.set("one", 1) pd.set("two", 1) pd.set("three", 1) # print(pd.items()) pd.clear_dirtybits() pd.set("three", 3) pd.set("two", 2) self.assertEqual(pd.clean_keys()[0], "one")
def test_dirty(self): pd = persistent_dict.PersistentDict("testfile.txt") pd["one"] = 1 pd["two"] = 1 pd["three"] = 1 # print(pd.items()) pd.clear_dirtybits() pd["three"] = 3 pd["two"] = 2 self.assertEquals(pd.clean_keys()[0], "one")
def __init__(self, context, source, testing=False): self.logger = logging.getLogger("gc.GhettoClusterSource") self.config = config.Config.instance() self.context = context self.source = source self.testing = testing self.verbose = self.config.getOption("verbose", "False") == "True" self.path = config.path_for(source) hostname = config.host_for(source) self.states_filename = f"{self.path}/.gc/{hostname}.{context}.json" self.states = persistent_dict.PersistentDict(self.states_filename, \ self.config.getOption("LAZY_WRITE", 5))
def test_metadata(self): metadata = {1: "one", 3: "three"} pd = persistent_dict.PersistentDict("testfile.txt", \ metadata_key="__metadata__") pd.set("one", 1) pd.set("red", 2) self.assertEqual(len(pd.items()), 2) pd.set("__metadata__", metadata) # print(pd.items()) # print(f"{pd.items()}: {len(pd.items())}") self.assertEqual(len(pd.items()), 2) more_data = pd.get("__metadata__") self.assertEqual(more_data, metadata)
def __init__(self, context, dest, source): super().__init__(context) self.dest = dest if not source.endswith("/"): self.source = source + "/" else: self.source = source hostname = config.host_for(dest) self.path = config.path_for(dest) self.states_filename = f"{self.path}/.ghetto_cluster/" \ f"{hostname}.{context}.json" self.states = persistent_dict.PersistentDict(self.states_filename, \ self.config.getOption("LAZY_WRITE", 5)) self.testing = False self.verbose = self.config.getOption("verbose", "False") == "True" self.logger = logging.getLogger(logger_str(__class__))
def get_status(self, context, source, to_console=True): prefix = f"{config.path_for(source)}/.ghetto_cluster/" source_file = f"{prefix}/source.{context}.json" source_states = persistent_dict.PersistentDict(source_file) (files, bytes) = self.sizeof(source_states) # print(f"Source: {context}: {config.path_for(source)} " + \ print(f"Source: {source}: " + \ f"{files} files, {bytes/2**30:.2f}GB") #TODO: pretty this source = self.config.get_source_for_context(context) for replica in self.config.get_replicas_for_context(context): msg = self.inspect_replica(source, source_states, context, replica) if to_console: print(msg) else: self.logger.info(msg) if to_console: print()
def test_item(self): pd = persistent_dict.PersistentDict("testfile.txt") pd["one"] = "1" self.assertEquals(pd["one"], "1") pd["two"] = "2" self.assertTrue("two" in pd)
def test_io(self): pd = persistent_dict.PersistentDict("testfile.txt") pd.set("thing", 1) pd.set("thing two", "two") pd2 = persistent_dict.PersistentDict("testfile.txt") self.assertEqual(pd2.get("thing two"), "two")
def inspect_replica(self, source, source_states, context, replica): prefix = f"{config.path_for(source)}/.ghetto_cluster/" hostname = config.host_for(replica) replica_file = f"{prefix}/{hostname}.{context}.json" replica_states = persistent_dict.PersistentDict(replica_file) if self.config.getOption("verbose", "False") == "True": msg = f"{replica} ::\n" missing = mismatch = extra = 0 lines = 0 for fqde, source_state in source_states.items(): if replica_states.contains_p(fqde): replica_states.touch(fqde) replica_state = replica_states.get(fqde) if source_state["size"] != replica_state["size"]: mismatch += 1 if lines < 10: msg += f"\tmismatch: {fqde} " if replica_state["ctime"] > source_state["ctime"]: msg += "replica is newer" else: # TODO: tell how stale it is msg += f"{duration_to_str(source_state['ctime'] - replica_state['ctime'])} stale" msg += "\n" lines += 1 else: missing += 1 if lines < 10: msg += f"\tmissing: {fqde}\n" lines += 1 if lines == 10: msg += "\t...\n" lines = 11 extra = len(replica_states.clean_keys()) if missing + mismatch + extra != 0: pct = 100 * len(replica_states.items()) / len( source_states.items()) if pct > 100: pct = 100 if int(pct) == 100: pct = 99 msg += f"\tmissing: {missing} ({pct:.0f}% complete); " + \ f"mismatched: {mismatch}; " + \ f"extra: {extra}" else: # TODO: staleness report # msg = self.check_replica_staleness(source, source_states, context, replica) + msg[:-4] msg = "Complete: " + msg[:-4] else: (target_files, target_bytes) = \ self.sizeof(source_states) (nlines, nbytes) = \ self.sizeof(replica_states) pct_complete = int(100 * nlines / target_files) if nlines == target_files: # msg = self.check_replica_staleness(source, source_states, replica_file) + f": {replica}" if self.replica_is_current(source, source_states, replica_file): msg = f" Complete : {replica}" else: msg = f" Stale: {replica}" elif nlines == 0: msg = f" Not started: {replica}" elif nlines < target_files: if self.replica_is_current(source, source_states, replica_file): msg = " Active: " else: msg = " Stale: " msg += f"{pct_complete:3d}% {nlines}/{target_files}: {replica}" else: msg += f"WARNING: too many files in replica " + \ f"{config.host_for(replica)}\n" + \ f"\t{nlines}/{target_files}: {replica}" return msg
def test_io(self): pd = persistent_dict.PersistentDict("testfile.txt") pd["thing"] = 1 pd["thing two"] = "two" pd2 = persistent_dict.PersistentDict("testfile.txt") self.assertEquals(pd2["thing two"], "two")