def cmd_push(self): prev = self.get_prev_value() newvalue = self.get_head_and_config() val = self.propose_value(prev, newvalue) # print("val: ", val, "newval: ", newvalue, "prev: ", prev) if (val != newvalue): dbg.err("You should fetch first") return False # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # if(len(master_head) > 0): # head_history = self.get_history() # if(not master_head in head_history): # dbg.err("You should update first") # self.unlock_master() # return False # check master is ancestor of the head shutil.copyfile(self.get_head(), self.get_prev()) self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue)) self._join() return True
def __init__(self, config, globalConfig, client_sock=None): self.config = config self.globalConfig = globalConfig self.init_balance = 100 self.set = [] self.blockchain = [] self.proposer = Proposer(self.config, globalConfig) self.acceptor = Acceptor(self.config) self.inPaxos = False
def test_paxos_latency(metasync, opts): lock = "locktest/ltest_latency" import services srvs = ["onedrive"] srvs_instance = map(services.factory, srvs) for srv in srvs_instance: if not srv.exists(lock): srv.put(lock, '') from paxos import Proposer proposer = Proposer("1", srvs_instance, lock) val = proposer.propose("1") assert val == "1" proposer.join()
def cmd_push(self): prev = self.get_prev_value() newvalue = self.get_head_and_config() val = self.propose_value(prev, newvalue) if(val != newvalue): dbg.err("You should fetch first") return False # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # if(len(master_head) > 0): # head_history = self.get_history() # if(not master_head in head_history): # dbg.err("You should update first") # self.unlock_master() # return False # check master is ancestor of the head shutil.copyfile(self.get_head(), self.get_prev()) self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue)) self._join() return True
def __init__(self, pid): super(PaxosProcess, self).__init__(pid) self.process_count = 0 self.proposers = defaultdict(lambda: Proposer(self.process_count)) self.acceptors = defaultdict(lambda: Acceptor(self.process_count)) self.learners = defaultdict(lambda: Learner(self.process_count)) self.client_requests = [] self.internal_requests = []
def test_paxos(metasync, opts): "test paxos with disk_api" lock = 'locktest/ltest' test_init(metasync, opts) srvs = metasync.services for srv in srvs: if not srv.exists(lock): srv.put(lock, '') from paxos import Proposer proposer = Proposer("1", srvs, lock) val = proposer.propose("1") assert val == "1" proposer.join()
def test_paxos_services(metasync, opts): "test paxos with services" # init tmp repo to play with #test_init(metasync, opts) # init lock primitives lock = 'locktest/ltest2' targets = ["google", "box", "dropbox"] srvs = map(services.factory, targets) for srv in srvs: if not srv.exists(lock): srv.put(lock, '') from paxos import Proposer proposer = Proposer("1", srvs, lock) val = proposer.propose("1") assert val == "1" proposer.join()
def propose_value(self, prev, newvalue): from paxos import Proposer self.proposer = Proposer(self.clientid, self.services, self.get_remote_path("pPaxos/" + prev)) return self.proposer.propose(newvalue)
def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None): # already initialized? if self.check_sanity(): dbg.err("already initialized %s (%s)" \ % (self.path_root, self.namespace)) return False os.mkdir(self.path_meta) os.mkdir(self.path_objs) # build config opts conf = util.new_config() # core: unique/permanent info about local machine (often called client) # NOTE. not sure if encryption_key should be in core, or unchangable conf.add_section('core') conf.set('core', 'namespace', namespace) conf.set('core', 'clientid', util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) # backend: info about sync service providers # XXX: Error handling conf.add_section('backend') try: services = _get_conf_services(backend) conf.set('backend', 'services', services) conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(",")))) except: pass # flush with open(self.path_conf, "w") as fd: conf.write(fd) try: self._load() except NameError: shutil.rmtree(self.path_meta) return False # put config into remote conf.remove_option('core', 'clientid') conf.remove_option('core', 'encryptkey') with io.BytesIO() as out: conf.write(out) val = out.getvalue() configname = util.sha1(val) self._put_all_content( val, self.get_remote_path("configs/%s" % configname[:6]), True) #temporary --- move this to pPaxos #self._put_all_content(configname[:6], self.get_remote_path("config"), True) # Format for master: headhash.config[:6].version prev_master = "." + configname[:6] + ".0" # do we need both? or shall we put them into a file together. with open(self.get_head(), "w") as f: f.write(prev_master) with open(self.get_prev(), "w") as f: f.write(prev_master) self._put_all_dir(self.get_remote_path("objects")) # change to put_content self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master)) self._join() return True
def propose_value(self, prev, newvalue): from paxos import Proposer self.proposer = Proposer(self.clientid, self.services, self.get_remote_path("pPaxos/"+prev)) return self.proposer.propose(newvalue)
class MetaSync: def __init__(self, root, opts=None): # # repo/.metasync/ # ^ ^ # | +-- meta # +-- root # useful path info self.path_root = self._find_root(root) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") self.path_master = self.get_path("master") self.path_head_history = self.get_path("head_history") self.options = opts # local blob store self.blobstore = BlobStore2(self) #BlobStore(self.path_objs) # load on demand self.config = None self.srvmap = {} self.scheduler = None self.translators = [] self.mapping = None # post init self._load() def _find_root(self, curpath): # find repo curpath = os.path.abspath(curpath) orgpath = curpath auth_dir = os.path.join(os.path.expanduser("~"), ".metasync") while True: path = os.path.join(curpath, META_DIR) if(path != auth_dir and os.path.exists(os.path.join(curpath, META_DIR))): return curpath sp = os.path.split(curpath) if(sp[1] == ""): break curpath = sp[0] return orgpath @property def services(self): return self.srvmap.values() # load member variables from config def _load(self): if not self.check_sanity(): return if(not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR) # load config self.config = util.load_config(self.path_conf) self.namespace = self.config.get("core", "namespace") self.clientid = self.config.get("core", "clientid") # load services from config self.srvmap = {} for tok in self.config.get("backend", "services").split(","): srv = services.factory(tok) self.srvmap[srv.sid()] = srv self.nreplicas = int(self.config.get("backend", "nreplicas")) nthreads = self.options.nthreads if self.options is not None else 2 self.scheduler = Scheduler(self.services, (nthreads+1)*len(self.srvmap)) # load translator pipe if self.is_encypted(): self.translators.append(translators.TrEncrypt(self)) # TODO. for integrity option # if self.is_signed(): # self.translators.append(TrSigned(self)) beg = time.time() if(os.path.exists(self.get_path("mapping.pcl"))): with open(self.get_path("mapping.pcl")) as f: self.mapping = pickle.load(f) else: mapconfig = [] for srv in self.services: mapconfig.append((srv.sid(), srv.info_storage()/GB)) hspacesum = sum(map(lambda x:x[1], mapconfig)) hspace = max(hspacesum+1, 1024) self.mapping = DetMap2(mapconfig, hspace=hspace, replica=self.nreplicas) self.mapping.pack() with open(self.get_path("mapping.pcl"), "w") as f: pickle.dump(self.mapping, f) end = time.time() dbg.time("mapping init %s" % (end-beg)) dbg.dbg("head: %s", self.get_head_name()) def cmd_reconfigure(self, backends, replica): srvmap = {} for tok in backends.split(","): srv = services.factory(tok) srvmap[srv.sid()] = srv lst_services = srvmap.values() mapconfig = [] lock_dic = {} for srv in lst_services: mapconfig.append((srv.sid(), srv.info_storage()/GB)) for srv in srvmap: lock_dic[srv] = threading.Lock() if srv not in self.srvmap: srvmap[srv].putdir(self.get_remote_path("objects")) for srv in self.srvmap: if srv not in lock_dic: lock_dic[srv] = threading.Lock() beg = time.time() self.mapping.reconfig(mapconfig, eval(replica)) end = time.time() dbg.info("remap: %.3fs" % (end-beg)) beg = time.time() lst_objs = self.blobstore.list() added, removed = self.mapping.get_remapping(lst_objs) nthreads = self.options.nthreads if self.options is not None else 2 #REFACTOR def __put_next(srv, lst, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if(len(lst) == 0): lock.release() break next = lst.pop() lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): time.sleep(backoff) backoff *= 2 def __rm_next(srv, lst, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if(len(lst) == 0): lock.release() break next = lst.pop() lock.release() if next is not None: remote_path = self.get_remote_obj_path(next) srv.rm(remote_path) cnt_added = 0 for srv in added: if(len(added[srv]) == 0): continue cnt_added += len(added[srv]) for i in range(nthreads): self.scheduler.submit(srvmap[srv], False, __put_next, added[srv], lock_dic[srv]) self._join() end = time.time() dbg.info("remap put: %.3fs" % (end-beg)) beg = time.time() cnt_removed = 0 for srv in removed: if(len(removed[srv]) == 0): continue cnt_removed += len(removed[srv]) for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __rm_next, removed[srv], lock_dic[srv]) self._join() end = time.time() dbg.info("remap rm: %.3fs" % (end-beg)) dbg.info("added %d, removed %d" % (cnt_added, cnt_removed)) # config-related parser def is_encypted(self): key = self.config.get('core', 'encryptkey').strip() return key != "" # handling dir/path names def get_path(self, path): return os.path.join(self.path_meta, path) def get_head(self): return self.get_path(self.get_head_name()) def get_head_name(self): return "head_%s" % self.get_client_id() def get_head_value(self): with open(self.get_head()) as f: return f.read().strip().split(".")[0] return None def get_head_and_config(self): with open(self.get_head()) as f: return f.read().strip() return None def get_prev(self): return self.get_path(self.get_prev_name()) def get_prev_name(self): return "prev_%s" % self.get_client_id() def get_prev_value(self): with open(self.get_prev()) as f: return f.read().strip() return None def get_next_version(self): with open(self.get_prev()) as f: return int(f.read().strip().split(".")[2]) + 1 return None #XXX: Cache? def get_config_hash(self): with open(self.get_head()) as f: return f.read().strip().split(".")[1] return None def get_client_id(self): return self.clientid def get_relative_path(self, path): return os.path.relpath(os.path.abspath(path), self.path_root) def get_local_path(self, *path): return os.path.join(self.path_root, *[p.strip("/") for p in path]) def get_local_obj_path(self, hv): return os.path.join(self.path_objs, hv) def get_remote_path(self, *path): #return os.path.join(self.namespace, *path).rstrip("/") return "/".join([self.namespace] + list(path)).rstrip("/").rstrip("\\") def get_remote_obj_path(self, *hashes): return self.get_remote_path("objects", *hashes) def get_root_blob(self): return self.blobstore.get_root_blob() # check basic sanity of repo's meta info def check_sanity(self, whynot=False): def __err(why): if whynot: print >> sys.stderr, why return False if not os.path.exists(self.path_meta): return __err("Can't find the root of repo (%s)" % self.path_meta) if not os.path.exists(self.path_conf): return __err("Can't find config (%s)" % self.path_conf) if not os.path.exists(self.path_objs): return __err("Can't find objects store (%s)" % self.path_objs) return True # schedule-related def _put_all_content(self, content, remote_path, serial=False): def __put(srv): #dbg.job("submitted to: %s" % srv) srv.put(remote_path, content) # submit jobs for srv in self.services: self.scheduler.submit(srv, serial, __put) def _put_all_dir(self, remote_path): # XXX. handle errs def __putdir(srv): srv.putdir(remote_path) # submit jobs for srv in self.services: self.scheduler.submit(srv, True, __putdir) def _put_all(self, path, remote_path): # XXX. handle errs def __put(srv): with open(path, "rb") as f: srv.put(remote_path, f.read()) # submit jobs for srv in self.services: self.scheduler.submit(srv, True, __put) def _update_all(self, path, remote_path): # XXX. handle errs def __update(srv): #dbg.job("submitted to: %s" % srv) with open(path, "rb") as f: #print 'start to put' srv.update(remote_path, f.read()) #print 'put ends' # submit jobs for srv in self.services: self.scheduler.submit(srv, True, __update) def _join(self): self.scheduler.join() def _get(self, srv, path, remote_path): def __get(srv, path, remote_path): dbg.job("submitted to: %s (%s)" % (srv, path)) with open(path, "wb") as f: blob = srv.get(remote_path) if(blob is None): time.sleep(1) blob = srv.get(remote_path) for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob) self.scheduler.submit(srv, False, __get, path, remote_path) # bstore-related def bstore_download(self): # TODO, handle when R > 1 lst = self.blobstore.list() #dbg.dbg("lst files:%s" % lst) lock = threading.Lock() def __get_next(srv, hash_dic, lock, allset, srvname): if(len(hash_dic[srvname]) == 0): return while True: lock.acquire() try: next = hash_dic[srvname].pop() l = len(hash_dic[srvname]) if(l%10 == 0): dbg.dbg("%s left %d" % (srvname, l)) if(next not in allset): allset.add(next) else: next = None except: lock.release() break lock.release() if(next is not None): remote_path = self.get_remote_obj_path(next) path = os.path.join(self.path_objs, next) with open(path, "wb") as f: backoff = 0.5 while True: blob = srv.get(remote_path) if(blob is not None): break dbg.dbg("back off %s" % srvname) time.sleep(backoff) backoff*=2 for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob) hash_dic = {} allset = set([]) for srv in self.services: hash_dic[str(srv)] = [] srvlist = srv.listdir(self.get_remote_obj_path()) backoff = 1 while srvlist is None: dbg.dbg("back off - listdir %s" % str(srv)) time.sleep(backoff) srvlist = srv.listdir(self.get_remote_obj_path()) for hashname in srvlist: if(hashname in lst): #dbg.dbg("%s is already in bstore" % hashname) continue hash_dic[str(srv)].append(hashname) nthreads = self.options.nthreads if self.options is not None else 2 for srv in self.services: dbg.dbg("%s:%d dn" % (str(srv), len(hash_dic[str(srv)]))) ##HACK for i in range(nthreads): self.scheduler.submit(srv, False, __get_next, hash_dic, lock, allset, str(srv)) def bstore_sync_left(self, hashdic): cnt = 0 for i in hashdic: cnt += len(hashdic[i]) if(cnt == 0): return def __put_next(srv, lst, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if(len(lst) == 0): lock.release() break next = lst.pop() lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): time.sleep(backoff) backoff *= 2 lock_dic = {} for i in hashdic: lock_dic[i] = threading.Lock() nthreads = self.options.nthreads if self.options is not None else 2 for srv in hashdic: for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic[srv], lock_dic[srv]) self._join() #XXX: it needs to return after one set is put, and continue on replication. def bstore_sync(self, hashnames): dbg.dbg("need to sync: %s..@%d" % (hashnames[0], len(hashnames))) def __put_next(srv, hashdic, hashdic_left, allset, key, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if(len(hashdic[key]) == 0 or len(allset) == 0): lock.release() break next = hashdic[key].pop() if(next in allset): allset.remove(next) else: hashdic_left[key].append(next) next = None lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): dbg.dbg("backoff %s" % srv) time.sleep(backoff) backoff *= 2 nthreads = self.options.nthreads if self.options is not None else 2 hashdic = {} hashdic_left = {} allset = set() lock = threading.Lock() for srv in self.srvmap: hashdic[srv] = [] hashdic_left[srv] = [] for hashname in hashnames: allset.add(hashname) for i in self.mapping.get_mapping(hashname): hashdic[i].append(hashname) for srv in hashdic: for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic, hashdic_left, allset, srv, lock) self._join() return hashdic_left # iterate bstore def bstore_iter(self): for root, dirs, files in os.walk(self.path_objs): for name in files: yield name def bstore_iter_remote(self, srv): assert srv in self.services # NOTE. at some point, we need cascaded directory hierarchy for obj in srv.listdir(self.get_remote_obj_path()): yield obj #XXX. update only changed files (SY) def restore_from_master(self): root = self.get_root_blob() dbg.dbg("restore") for name, blob in root.walk(): pn = os.path.join(self.path_root, name) if blob.thv == "F": content = blob.read() util.write_file(pn, content.getvalue()) content.close() if blob.thv == "m": content = blob.read() util.write_file(pn, content) elif blob.thv == "D" or blob.thv == "M": try: os.mkdir(pn) except: pass return True def propose_value(self, prev, newvalue): from paxos import Proposer self.proposer = Proposer(self.clientid, self.services, self.get_remote_path("pPaxos/"+prev)) return self.proposer.propose(newvalue) # need to truncate if history is too long. def get_history(self, is_master=False): pn = self.path_master_history if is_master else self.path_head_history content = util.read_file(pn).strip() if content: history = content.split("\n") history.reverse() else: history = [] return history def get_common_ancestor(self, head_history, master_history, known_common_history=None): # change to use known_common_history for head in head_history: if(head in master_history): return head return None def try_merge(self, head_history, master_history): # this need to be fixed. dbg.dbg("Trying to merge") # we may need to cache the last branched point common = self.get_common_ancestor(head_history, master_history) dbg.dbg("%s %s %s", head_history[0], master_history[0], common) common = self.blobstore.get_blob(common, "D") head = self.get_root_blob() master = self.blobstore.get_blob(master_history[0], "D") added1 = head.diff(common) added2 = master.diff(common) def intersect(a, b): return list(set(a) & set(b)) if(len(intersect(added1.keys(), added2.keys())) != 0): dbg.err("both modified--we need to handle it") return False for i in added2.keys(): path = os.path.join(self.path_root, i) dirblob = self.blobstore.load_dir(os.path.dirname(path), dirty=True) dirblob.add(os.path.basename(path), added2[i], dirty=False) # HACK, need to go through all the non-overlapped history. self.append_history(master.hv) head.store() self.append_history(head.hv) # HACK, need to be changed newblobs = self.blobstore.get_added_blobs() # push new blobs remotely self.bstore_sync(newblobs) self._join() return True def get_uptodate_master(self, includeself=True, srv=None): # copy all the heads. --- it should have version number. or something to compare against each other. if srv == None: srv = self.services[0] prev_clients = filter(lambda x:x.startswith("prev_"), srv.listdir(self.get_remote_path(""))) pointers = set() for prev in prev_clients: if not includeself or not prev.endswith(self.clientid): with open(self.get_path(prev), "w") as f: pointer = srv.get(self.get_remote_path(prev)) pointers.add(pointer) f.write(pointer) if includeself: pointers.add(self.get_prev_value()) return max(pointers, key=lambda x:int(x.split(".")[2])) def check_master_uptodate(self): srv = self.services[0] remote_master = srv.get(self.get_remote_path("master")) with open(self.path_master) as f: master_head = f.read().strip() if(master_head != remote_master): return False return True def cmd_poll(self): srv = self.services[0] srv.poll(self.namespace) # # end-user's interfaces (starting with cmd_ prefix) # NOTE. explicitly return True/False to indicate status of 'cmd' # def cmd_share(self, target_email): if not self.check_sanity(): dbg.err("this is not metasync repo") return False for srv in self.services: srv.share(self.namespace, target_email) def cmd_diff(self): # work only for 1-level directory # need to add diff for file if not self.check_sanity(): dbg.err("this is not metasync repo") return False root = self.get_root_blob() added = [] removed = [] files = os.listdir(".") for f in files: if(f == ".metasync"): continue if("/"+f not in root.files): added.append(f) for f in root.files: if(f[1:] not in files): removed.append(f[1:]) for f in added: print("+++ %s" % f) for f in removed: print("--- %s" % f) def cmd_mv(self, src_pn, dst_pn): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False src_pn = os.path.abspath(src_pn) dst_pn = os.path.abspath(dst_pn) #TODO: check src_pn exists beg = time.time() try: dirname = os.path.dirname(src_pn) dirblob = self.blobstore.load_dir(dirname, False, dirty=True) if(dirblob is None): dbg.err("%s does not exist" % src_pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(src_pn) if(not fname in dirblob): dbg.err("%s does not exist" % pn) return False fblob = dirblob[fname] dirblob.rm(fname) dst_dirname = os.path.dirname(dst_pn) if(dirname != dst_dirname): dirblob = self.blobstore.load_dir(dirname, True, dirty=True) assert dirblob is not None dst_fname = os.path.basename(dst_pn) dirblob.add(dst_fname, fblob, dirty=False) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end-beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end-beg)) # move the file shutil.move(src_pn, dst_pn) self._join() return True def cmd_peek(self): root = self.get_root_blob() for i in root.walk(): print(i) # print("hash: %s" % root.hash_head) # print(root.dump_info()) # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # print("head_history %s" % ",".join(self.get_history(head))) # print("master_history %s" % ",".join(self.get_history(master_head))) def cmd_fetch(self): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False # TODO: change it into comparing between masters self.bstore_download() self._join() return True def update_changed(self, head, master): def _file_create(blob, pn): if(blob.thv == "D" or blob.thv == "M"): util.mkdirs(pn) for i in blob.entries: _file_create(blob[i], os.path.join(pn, i)) elif(blob.thv == "F"): content = blob.read() util.write_file(pn, content.getvalue()) content.close() # touch metadata blob (for cmd_status) os.utime(os.path.join(self.path_objs, blob.hv), None) elif(blob.thv == "m"): content = blob.read() util.write_file(pn, content) # touch metadata blob (for cmd_status) os.utime(os.path.join(self.path_objs, blob.hv), None) else: assert False def _update(old_dirblob, new_dirblob, path): for fname in new_dirblob.entries: blob = new_dirblob[fname] if(fname not in old_dirblob): _file_create(blob, os.path.join(path, fname)) elif(blob.hv != old_dirblob[fname].hv): if(blob.thv == "D"): _update(old_dirblob[fname], blob, os.path.join(path, fname)) elif(blob.thv == "F"): _file_create(blob, os.path.join(path, fname)) else: print(blob.thv) assert False headblob = self.blobstore.get_blob(head, "D") masterblob = self.blobstore.get_blob(master, "D") _update(headblob, masterblob, self.path_root) def update_head_and_prev(self, master): with open(self.get_prev(), "w") as f: f.write(master) with open(self.get_head(), "w") as f: f.write(master) def cmd_update(self): master = self.get_uptodate_master() # already up-to-date prev = self.get_prev_value() if (master == prev): return True head = self.get_head_and_config() # XXX: need to check if non-checked in but modified files. if (head == prev): self.update_changed(head.split(".")[0], master.split(".")[0]) else: ### need to merge raise Exception('Merge required') self.update_head_and_prev(master) self.blobstore.rootblob = None dbg.info("update done %s" % time.ctime()) return True #XXX: Seungyeop is working on it. def cmd_clone(self, namespace, backend=None, encrypt_key=None): # if wrong target if self.check_sanity(): return False # reset all the path by including the namespace self.path_root = os.path.join(self.path_root, namespace) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") #self.path_head_history = self.get_path("head_history") if os.path.exists(self.path_root): dbg.err("%s already exists." % self.path_root) return False if backend is None: print "input one of the storage backends, (e.g., dropbox,google,box)" print " for testing, use disk@/path (e.g., disk@/tmp)" backend = raw_input("> ") srv = services.factory(backend) self.namespace = namespace # create repo directory os.mkdir(self.path_root) os.mkdir(self.path_meta) os.mkdir(self.path_objs) curmaster = self.get_uptodate_master(False, srv) sp = curmaster.split(".") master = sp[0] seed = sp[1] seed = srv.get(self.get_remote_path("configs/%s" % seed)) conf = util.loads_config(seed) # setup client specific info conf.set('core', 'clientid' , util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) with open(self.path_conf, "w") as fd: conf.write(fd) self._load() beg = time.time() self.bstore_download() self._join() with open(self.get_head(), "w") as f: f.write(curmaster) with open(self.get_prev(), "w") as f: f.write(curmaster) # send my head to remote self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) self._join() if (master): ret = self.restore_from_master() end = time.time() dbg.dbg("clone: %ss" % (end-beg)) return True def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None): # already initialized? if self.check_sanity(): dbg.err("already initialized %s (%s)" \ % (self.path_root, self.namespace)) return False os.mkdir(self.path_meta) os.mkdir(self.path_objs) # build config opts conf = util.new_config() # core: unique/permanent info about local machine (often called client) # NOTE. not sure if encryption_key should be in core, or unchangable conf.add_section('core') conf.set('core', 'namespace' , namespace) conf.set('core', 'clientid' , util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) # backend: info about sync service providers # XXX: Error handling conf.add_section('backend') try: services = _get_conf_services(backend) conf.set('backend', 'services' , services) conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(",")))) except: pass # flush with open(self.path_conf, "w") as fd: conf.write(fd) try: self._load() except NameError: shutil.rmtree(self.path_meta) return False # put config into remote conf.remove_option('core','clientid') conf.remove_option('core','encryptkey') with io.BytesIO() as out: conf.write(out) val = out.getvalue() configname = util.sha1(val) self._put_all_content(val, self.get_remote_path("configs/%s" % configname[:6]), True) #temporary --- move this to pPaxos #self._put_all_content(configname[:6], self.get_remote_path("config"), True) # Format for master: headhash.config[:6].version prev_master = "." + configname[:6] + ".0" # do we need both? or shall we put them into a file together. with open(self.get_head(), "w") as f: f.write(prev_master) with open(self.get_prev(), "w") as f: f.write(prev_master) self._put_all_dir(self.get_remote_path("objects")) # change to put_content self._put_all(self.get_head() , self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev() , self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master)) self._join() return True def get_pPaxos_path(self, path): return self.get_remote_path("pPaxos/" + path) def cmd_gc(self): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False def _find_all_blobs(blob, tracked): # we may need to move this to blobstore if(blob.hv in tracked): return tracked.add(blob.hv) if(blob.thv == "C"): return for name, childblob in blob.entries.iteritems(): _find_all_blobs(childblob, tracked) # check head head = self.get_head_value() tracked = set([]) if(head is not None and len(head)>0): blob = self.blobstore.get_blob(head, "D") _find_all_blobs(blob, tracked) # check master with open(self.path_master) as f: master_head = f.read().strip() if(len(master_head) > 0): blob = self.blobstore.get_blob(master_head, "D") _find_all_blobs(blob, tracked) allblobs = set(self.blobstore.list()) # remove following blobs_to_remove = allblobs - tracked def __rm(srv, remote_path): dbg.job("submitted to: %s (%s)" % (srv, remote_path)) srv.rm(remote_path) for hashname in blobs_to_remove: for i in self.mapping.get_mapping(hashname): self.scheduler.submit(self.srvmap[i], True, __rm, self.get_remote_obj_path(hashname)) os.unlink(self.get_local_obj_path(hashname)) return True def cmd_rm(self, pn): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False #TODO: check if the file exists beg = time.time() try: dirname = os.path.dirname(pn) dirblob = self.blobstore.load_dir(dirname, False) if(dirblob is None): dbg.err("%s does not exist" % pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(pn) if(not fname in dirblob): dbg.err("%s does not exist" % pn) return False dirblob.rm(fname) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() # we may need to include pointer for previous version. util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end-beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end-beg)) self._join() # drop local copy # TODO: rm only tracked files if removing file. try: os.unlink(pn) except: dbg.err("failed to rm %s" % pn) return False return True def append_history(self, hv): util.append_file(self.path_head_history, hv+"\n") def cmd_checkin(self, paths, unit=BLOB_UNIT, upload_only_first=False): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False if type(paths) != types.ListType: paths = [paths] for pn in paths: if not os.path.exists(pn): dbg.err("File %s doesn't exits." % pn) return False beg = time.time() #XXX: considering mtime, check hash of chunks? changed = False for path in paths: if(not os.path.isfile(path)): changed = True for root, dirs, files in os.walk(path): fsizesum = 0 for fname in files: fsizesum += os.stat(os.path.join(root,fname)).st_size print(root + " " + str(fsizesum)) if(fsizesum < unit): dirblob = self.blobstore.load_dir(root, dirty=True, merge=True) for fname in files: dirblob.add_file(fname, os.path.join(root, fname)) dirblob.done_adding() else: dirblob = self.blobstore.load_dir(root, dirty=True) for fname in files: fileblob = self.blobstore.load_file(os.path.join(root, fname), unit) if(fname in dirblob and dirblob[fname].hv == fileblob.hv): continue dirblob.add(fname, fileblob) else: fileblob = self.blobstore.load_file(path, unit) dirname = os.path.dirname(path) if(dirname == ""): dirname = "." dirblob = self.blobstore.load_dir(dirname, dirty=True) fname = os.path.basename(path) if(fname in dirblob and dirblob[fname].hv == fileblob.hv): continue changed = True dirblob.add(fname, fileblob) if(not changed): return True root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file(self.get_head(), "%s.%s.%d" % (root.hv, self.get_config_hash(), self.get_next_version())) end = time.time() dbg.time("local write: %f" % (end-beg)) # push new blobs remotely leftover = self.bstore_sync(newblobs) self._update_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._join() end = time.time() dbg.time("remote write for R1: %f" % (end-beg)) if(not upload_only_first): self.bstore_sync_left(leftover) end = time.time() dbg.time("remote write for left: %f" % (end-beg)) return [] else: return leftover def cmd_push(self): prev = self.get_prev_value() newvalue = self.get_head_and_config() val = self.propose_value(prev, newvalue) if(val != newvalue): dbg.err("You should fetch first") return False # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # if(len(master_head) > 0): # head_history = self.get_history() # if(not master_head in head_history): # dbg.err("You should update first") # self.unlock_master() # return False # check master is ancestor of the head shutil.copyfile(self.get_head(), self.get_prev()) self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue)) self._join() return True def cmd_status(self, unit=BLOB_UNIT): def simple_walk(folder): # simple_walk will skip dipping into the folder # that are not tracked in the repo untracked = [] changed = [] for f in os.listdir(folder): if f == META_DIR: continue basename = os.path.basename(folder) if basename == '.' or basename == '': relpath = f else: relpath = os.path.join(folder, f) if relpath in tracked: if os.path.isdir(f): _untracked, _changed = simple_walk(relpath) untracked.extend(_untracked) changed.extend(_changed) else: fblob = tracked[relpath] # compare the file modified time and its metadata blob modified time curr_mtime = os.path.getmtime(relpath) last_mtime = os.path.getmtime(os.path.join(self.path_objs, fblob.hv)) if curr_mtime > last_mtime: # only load file when the file modified time is greater than metadata modified time fblob._load() flag = False # compare chunk hash for (offset, chunk) in util.each_chunk2(relpath, unit): if util.sha1(chunk) != fblob.entries[offset].hv: flag = True break if flag: changed.append(relpath) else: if os.path.isdir(relpath): relpath = os.path.join(relpath, '') untracked.append(relpath) return untracked, changed if not self.check_sanity(): dbg.err("this is not a metasync repo") return False # switch to metasync repo root folder os.chdir(self.path_root) # compare the head and master history head_history = self.get_history() master_history = self.get_history(True) head_diverge = 0 for head in head_history: if (head in master_history): break head_diverge += 1 if head_diverge == len(head_history): master_diverge = len(master_history) else: master_diverge = master_history.index(head_history[head_diverge]) if head_diverge == 0 and master_diverge == 0: print "\nYour branch is up-to-date with master." elif head_diverge == 0: print "\nYour branch is behind master by %d commit(s)." % master_diverge elif master_diverge == 0: print "\nYour branch is ahead of master by %d commit(s)." % head_diverge else: print "\nYour branch and master have diverged," print "and have %d and %d different commits each, respectively" % (head_diverge, master_diverge) root = self.get_root_blob() tracked = {} for (path, blob) in root.walk(): tracked[path] = blob untracked, changed = simple_walk('.') if changed: print("\nChanges not checked in:") for f in changed: print("\033[31m\tmodified: %s\033[m" % f) if untracked: print("\nUntracked files:") for f in untracked: print("\033[31m\t%s\033[m" % f) return True
def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None): # already initialized? if self.check_sanity(): dbg.err("already initialized %s (%s)" \ % (self.path_root, self.namespace)) return False os.mkdir(self.path_meta) os.mkdir(self.path_objs) # build config opts conf = util.new_config() # core: unique/permanent info about local machine (often called client) # NOTE. not sure if encryption_key should be in core, or unchangable conf.add_section('core') conf.set('core', 'namespace' , namespace) conf.set('core', 'clientid' , util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) # backend: info about sync service providers # XXX: Error handling conf.add_section('backend') try: services = _get_conf_services(backend) conf.set('backend', 'services' , services) conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(",")))) except: pass # flush with open(self.path_conf, "w") as fd: conf.write(fd) try: self._load() except NameError: shutil.rmtree(self.path_meta) return False # put config into remote conf.remove_option('core','clientid') conf.remove_option('core','encryptkey') with io.BytesIO() as out: conf.write(out) val = out.getvalue() configname = util.sha1(val) self._put_all_content(val, self.get_remote_path("configs/%s" % configname[:6]), True) #temporary --- move this to pPaxos #self._put_all_content(configname[:6], self.get_remote_path("config"), True) # Format for master: headhash.config[:6].version prev_master = "." + configname[:6] + ".0" # do we need both? or shall we put them into a file together. with open(self.get_head(), "w") as f: f.write(prev_master) with open(self.get_prev(), "w") as f: f.write(prev_master) self._put_all_dir(self.get_remote_path("objects")) # change to put_content self._put_all(self.get_head() , self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev() , self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master)) self._join() return True
class MetaSync: def __init__(self, root, opts=None): # # repo/.metasync/ # ^ ^ # | +-- meta # +-- root # useful path info self.path_root = self._find_root(root) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") self.path_master = self.get_path("master") self.path_head_history = self.get_path("head_history") self.options = opts # local blob store self.blobstore = BlobStore2(self) #BlobStore(self.path_objs) # load on demand self.config = None self.srvmap = {} self.scheduler = None self.translators = [] self.mapping = None # post init self._load() def _find_root(self, curpath): # find repo curpath = os.path.abspath(curpath) orgpath = curpath auth_dir = os.path.join(os.path.expanduser("~"), ".metasync") while True: path = os.path.join(curpath, META_DIR) if (path != auth_dir and os.path.exists(os.path.join(curpath, META_DIR))): return curpath sp = os.path.split(curpath) if (sp[1] == ""): break curpath = sp[0] return orgpath @property def services(self): return self.srvmap.values() # load member variables from config def _load(self): if not self.check_sanity(): return if (not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR) # load config self.config = util.load_config(self.path_conf) self.namespace = self.config.get("core", "namespace") self.clientid = self.config.get("core", "clientid") # load services from config self.srvmap = {} for tok in self.config.get("backend", "services").split(","): srv = services.factory(tok) self.srvmap[srv.sid()] = srv self.nreplicas = int(self.config.get("backend", "nreplicas")) nthreads = self.options.nthreads if self.options is not None else 2 self.scheduler = Scheduler(self.services, (nthreads + 1) * len(self.srvmap)) # load translator pipe if self.is_encypted(): self.translators.append(translators.TrEncrypt(self)) # TODO. for integrity option # if self.is_signed(): # self.translators.append(TrSigned(self)) beg = time.time() if (os.path.exists(self.get_path("mapping.pcl"))): with open(self.get_path("mapping.pcl")) as f: self.mapping = pickle.load(f) else: mapconfig = [] for srv in self.services: mapconfig.append((srv.sid(), srv.info_storage() / GB)) hspacesum = sum(map(lambda x: x[1], mapconfig)) hspace = max(hspacesum + 1, 1024) self.mapping = DetMap2(mapconfig, hspace=hspace, replica=self.nreplicas) self.mapping.pack() with open(self.get_path("mapping.pcl"), "w") as f: pickle.dump(self.mapping, f) end = time.time() dbg.time("mapping init %s" % (end - beg)) dbg.dbg("head: %s", self.get_head_name()) def cmd_reconfigure(self, backends, replica): srvmap = {} for tok in backends.split(","): srv = services.factory(tok) srvmap[srv.sid()] = srv lst_services = srvmap.values() mapconfig = [] lock_dic = {} for srv in lst_services: mapconfig.append((srv.sid(), srv.info_storage() / GB)) for srv in srvmap: lock_dic[srv] = threading.Lock() if srv not in self.srvmap: srvmap[srv].putdir(self.get_remote_path("objects")) for srv in self.srvmap: if srv not in lock_dic: lock_dic[srv] = threading.Lock() beg = time.time() self.mapping.reconfig(mapconfig, eval(replica)) end = time.time() dbg.info("remap: %.3fs" % (end - beg)) beg = time.time() lst_objs = self.blobstore.list() added, removed = self.mapping.get_remapping(lst_objs) nthreads = self.options.nthreads if self.options is not None else 2 #REFACTOR def __put_next(srv, lst, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if (len(lst) == 0): lock.release() break next = lst.pop() lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): time.sleep(backoff) backoff *= 2 def __rm_next(srv, lst, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if (len(lst) == 0): lock.release() break next = lst.pop() lock.release() if next is not None: remote_path = self.get_remote_obj_path(next) srv.rm(remote_path) cnt_added = 0 for srv in added: if (len(added[srv]) == 0): continue cnt_added += len(added[srv]) for i in range(nthreads): self.scheduler.submit(srvmap[srv], False, __put_next, added[srv], lock_dic[srv]) self._join() end = time.time() dbg.info("remap put: %.3fs" % (end - beg)) beg = time.time() cnt_removed = 0 for srv in removed: if (len(removed[srv]) == 0): continue cnt_removed += len(removed[srv]) for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __rm_next, removed[srv], lock_dic[srv]) self._join() end = time.time() dbg.info("remap rm: %.3fs" % (end - beg)) dbg.info("added %d, removed %d" % (cnt_added, cnt_removed)) # config-related parser def is_encypted(self): key = self.config.get('core', 'encryptkey').strip() return key != "" # handling dir/path names def get_path(self, path): return os.path.join(self.path_meta, path) def get_head(self): return self.get_path(self.get_head_name()) def get_head_name(self): return "head_%s" % self.get_client_id() def get_head_value(self): with open(self.get_head()) as f: return f.read().strip().split(".")[0] return None def get_head_and_config(self): with open(self.get_head()) as f: return f.read().strip() return None def get_prev(self): return self.get_path(self.get_prev_name()) def get_prev_name(self): return "prev_%s" % self.get_client_id() def get_prev_value(self): with open(self.get_prev()) as f: return f.read().strip() return None def get_next_version(self): with open(self.get_prev()) as f: return int(f.read().strip().split(".")[2]) + 1 return None #XXX: Cache? def get_config_hash(self): with open(self.get_head()) as f: return f.read().strip().split(".")[1] return None def get_client_id(self): return self.clientid def get_relative_path(self, path): return os.path.relpath(os.path.abspath(path), self.path_root) def get_local_path(self, *path): return os.path.join(self.path_root, *[p.strip("/") for p in path]) def get_local_obj_path(self, hv): return os.path.join(self.path_objs, hv) def get_remote_path(self, *path): #return os.path.join(self.namespace, *path).rstrip("/") return "/".join([self.namespace] + list(path)).rstrip("/").rstrip("\\") def get_remote_obj_path(self, *hashes): return self.get_remote_path("objects", *hashes) def get_root_blob(self): return self.blobstore.get_root_blob() # check basic sanity of repo's meta info def check_sanity(self, whynot=False): def __err(why): if whynot: print >> sys.stderr, why return False if not os.path.exists(self.path_meta): return __err("Can't find the root of repo (%s)" % self.path_meta) if not os.path.exists(self.path_conf): return __err("Can't find config (%s)" % self.path_conf) if not os.path.exists(self.path_objs): return __err("Can't find objects store (%s)" % self.path_objs) return True # schedule-related def _put_all_content(self, content, remote_path, serial=False): def __put(srv): #dbg.job("submitted to: %s" % srv) srv.put(remote_path, content) # submit jobs for srv in self.services: self.scheduler.submit(srv, serial, __put) def _put_all_dir(self, remote_path): # XXX. handle errs def __putdir(srv): srv.putdir(remote_path) # submit jobs for srv in self.services: self.scheduler.submit(srv, True, __putdir) def _put_all(self, path, remote_path): # XXX. handle errs def __put(srv): with open(path, "rb") as f: srv.put(remote_path, f.read()) # submit jobs for srv in self.services: self.scheduler.submit(srv, True, __put) def _update_all(self, path, remote_path): # XXX. handle errs def __update(srv): #dbg.job("submitted to: %s" % srv) with open(path, "rb") as f: #print 'start to put' srv.update(remote_path, f.read()) #print 'put ends' # submit jobs for srv in self.services: self.scheduler.submit(srv, True, __update) def _join(self): self.scheduler.join() def _get(self, srv, path, remote_path): def __get(srv, path, remote_path): dbg.job("submitted to: %s (%s)" % (srv, path)) with open(path, "wb") as f: blob = srv.get(remote_path) if (blob is None): time.sleep(1) blob = srv.get(remote_path) for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob) self.scheduler.submit(srv, False, __get, path, remote_path) # bstore-related def bstore_download(self): # TODO, handle when R > 1 lst = self.blobstore.list() #dbg.dbg("lst files:%s" % lst) lock = threading.Lock() def __get_next(srv, hash_dic, lock, allset, srvname): if (len(hash_dic[srvname]) == 0): return while True: lock.acquire() try: next = hash_dic[srvname].pop() l = len(hash_dic[srvname]) if (l % 10 == 0): dbg.dbg("%s left %d" % (srvname, l)) if (next not in allset): allset.add(next) else: next = None except: lock.release() break lock.release() if (next is not None): remote_path = self.get_remote_obj_path(next) path = os.path.join(self.path_objs, next) with open(path, "wb") as f: backoff = 0.5 while True: blob = srv.get(remote_path) if (blob is not None): break dbg.dbg("back off %s" % srvname) time.sleep(backoff) backoff *= 2 for tr in reversed(self.translators): blob = tr.get(blob) f.write(blob) hash_dic = {} allset = set([]) for srv in self.services: hash_dic[str(srv)] = [] srvlist = srv.listdir(self.get_remote_obj_path()) backoff = 1 while srvlist is None: dbg.dbg("back off - listdir %s" % str(srv)) time.sleep(backoff) srvlist = srv.listdir(self.get_remote_obj_path()) for hashname in srvlist: if (hashname in lst): #dbg.dbg("%s is already in bstore" % hashname) continue hash_dic[str(srv)].append(hashname) nthreads = self.options.nthreads if self.options is not None else 2 for srv in self.services: dbg.dbg("%s:%d dn" % (str(srv), len(hash_dic[str(srv)]))) ##HACK for i in range(nthreads): self.scheduler.submit(srv, False, __get_next, hash_dic, lock, allset, str(srv)) def bstore_sync_left(self, hashdic): cnt = 0 for i in hashdic: cnt += len(hashdic[i]) if (cnt == 0): return def __put_next(srv, lst, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if (len(lst) == 0): lock.release() break next = lst.pop() lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): time.sleep(backoff) backoff *= 2 lock_dic = {} for i in hashdic: lock_dic[i] = threading.Lock() nthreads = self.options.nthreads if self.options is not None else 2 for srv in hashdic: for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic[srv], lock_dic[srv]) self._join() #XXX: it needs to return after one set is put, and continue on replication. def bstore_sync(self, hashnames): dbg.dbg("need to sync: %s..@%d" % (hashnames[0], len(hashnames))) def __put_next(srv, hashdic, hashdic_left, allset, key, lock): dbg.job("submitted to: %s" % srv) while True: lock.acquire() if (len(hashdic[key]) == 0 or len(allset) == 0): lock.release() break next = hashdic[key].pop() if (next in allset): allset.remove(next) else: hashdic_left[key].append(next) next = None lock.release() if next is not None: with open(self.get_local_obj_path(next), "rb") as f: blob = f.read() for tr in self.translators: blob = tr.put(blob) # XXX HACK backoff = 0.5 remote_path = self.get_remote_obj_path(next) while not srv.put(remote_path, blob): dbg.dbg("backoff %s" % srv) time.sleep(backoff) backoff *= 2 nthreads = self.options.nthreads if self.options is not None else 2 hashdic = {} hashdic_left = {} allset = set() lock = threading.Lock() for srv in self.srvmap: hashdic[srv] = [] hashdic_left[srv] = [] for hashname in hashnames: allset.add(hashname) for i in self.mapping.get_mapping(hashname): hashdic[i].append(hashname) for srv in hashdic: for i in range(nthreads): self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic, hashdic_left, allset, srv, lock) self._join() return hashdic_left # iterate bstore def bstore_iter(self): for root, dirs, files in os.walk(self.path_objs): for name in files: yield name def bstore_iter_remote(self, srv): assert srv in self.services # NOTE. at some point, we need cascaded directory hierarchy for obj in srv.listdir(self.get_remote_obj_path()): yield obj #XXX. update only changed files (SY) def restore_from_master(self): root = self.get_root_blob() dbg.dbg("restore") for name, blob in root.walk(): pn = os.path.join(self.path_root, name) if blob.thv == "F": content = blob.read() util.write_file(pn, content.getvalue()) content.close() if blob.thv == "m": content = blob.read() util.write_file(pn, content) elif blob.thv == "D" or blob.thv == "M": try: os.mkdir(pn) except: pass return True def propose_value(self, prev, newvalue): from paxos import Proposer self.proposer = Proposer(self.clientid, self.services, self.get_remote_path("pPaxos/" + prev)) return self.proposer.propose(newvalue) # need to truncate if history is too long. def get_history(self, is_master=False): pn = self.path_master_history if is_master else self.path_head_history content = util.read_file(pn).strip() if content: history = content.split("\n") history.reverse() else: history = [] return history def get_common_ancestor(self, head_history, master_history, known_common_history=None): # change to use known_common_history for head in head_history: if (head in master_history): return head return None def try_merge(self, head_history, master_history): # this need to be fixed. dbg.dbg("Trying to merge") # we may need to cache the last branched point common = self.get_common_ancestor(head_history, master_history) dbg.dbg("%s %s %s", head_history[0], master_history[0], common) common = self.blobstore.get_blob(common, "D") head = self.get_root_blob() master = self.blobstore.get_blob(master_history[0], "D") added1 = head.diff(common) added2 = master.diff(common) def intersect(a, b): return list(set(a) & set(b)) if (len(intersect(added1.keys(), added2.keys())) != 0): dbg.err("both modified--we need to handle it") return False for i in added2.keys(): path = os.path.join(self.path_root, i) dirblob = self.blobstore.load_dir(os.path.dirname(path), dirty=True) dirblob.add(os.path.basename(path), added2[i], dirty=False) # HACK, need to go through all the non-overlapped history. self.append_history(master.hv) head.store() self.append_history(head.hv) # HACK, need to be changed newblobs = self.blobstore.get_added_blobs() # push new blobs remotely self.bstore_sync(newblobs) self._join() return True def get_uptodate_master(self, includeself=True, srv=None): # copy all the heads. --- it should have version number. or something to compare against each other. if srv == None: srv = self.services[0] prev_clients = filter(lambda x: x.startswith("prev_"), srv.listdir(self.get_remote_path(""))) pointers = set() for prev in prev_clients: if not includeself or not prev.endswith(self.clientid): with open(self.get_path(prev), "w") as f: pointer = srv.get(self.get_remote_path(prev)) pointers.add(pointer) f.write(pointer) if includeself: pointers.add(self.get_prev_value()) return max(pointers, key=lambda x: int(x.split(".")[2])) def check_master_uptodate(self): srv = self.services[0] remote_master = srv.get(self.get_remote_path("master")) with open(self.path_master) as f: master_head = f.read().strip() if (master_head != remote_master): return False return True def cmd_poll(self): srv = self.services[0] srv.poll(self.namespace) # # end-user's interfaces (starting with cmd_ prefix) # NOTE. explicitly return True/False to indicate status of 'cmd' # def cmd_share(self, target_email): if not self.check_sanity(): dbg.err("this is not metasync repo") return False for srv in self.services: srv.share(self.namespace, target_email) def cmd_diff(self): # work only for 1-level directory # need to add diff for file if not self.check_sanity(): dbg.err("this is not metasync repo") return False root = self.get_root_blob() added = [] removed = [] files = os.listdir(".") for f in files: if (f == ".metasync"): continue if ("/" + f not in root.files): added.append(f) for f in root.files: if (f[1:] not in files): removed.append(f[1:]) for f in added: print("+++ %s" % f) for f in removed: print("--- %s" % f) def cmd_mv(self, src_pn, dst_pn): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False src_pn = os.path.abspath(src_pn) dst_pn = os.path.abspath(dst_pn) #TODO: check src_pn exists beg = time.time() try: dirname = os.path.dirname(src_pn) dirblob = self.blobstore.load_dir(dirname, False, dirty=True) if (dirblob is None): dbg.err("%s does not exist" % src_pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(src_pn) if (not fname in dirblob): dbg.err("%s does not exist" % pn) return False fblob = dirblob[fname] dirblob.rm(fname) dst_dirname = os.path.dirname(dst_pn) if (dirname != dst_dirname): dirblob = self.blobstore.load_dir(dirname, True, dirty=True) assert dirblob is not None dst_fname = os.path.basename(dst_pn) dirblob.add(dst_fname, fblob, dirty=False) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end - beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end - beg)) # move the file shutil.move(src_pn, dst_pn) self._join() return True def cmd_peek(self): root = self.get_root_blob() for i in root.walk(): print(i) # print("hash: %s" % root.hash_head) # print(root.dump_info()) # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # print("head_history %s" % ",".join(self.get_history(head))) # print("master_history %s" % ",".join(self.get_history(master_head))) def cmd_fetch(self): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False # TODO: change it into comparing between masters self.bstore_download() self._join() return True def update_changed(self, head, master): def _file_create(blob, pn): if (blob.thv == "D" or blob.thv == "M"): util.mkdirs(pn) for i in blob.entries: _file_create(blob[i], os.path.join(pn, i)) elif (blob.thv == "F"): content = blob.read() util.write_file(pn, content.getvalue()) content.close() # touch metadata blob (for cmd_status) os.utime(os.path.join(self.path_objs, blob.hv), None) elif (blob.thv == "m"): content = blob.read() util.write_file(pn, content) # touch metadata blob (for cmd_status) os.utime(os.path.join(self.path_objs, blob.hv), None) else: assert False def _update(old_dirblob, new_dirblob, path): for fname in new_dirblob.entries: blob = new_dirblob[fname] if (fname not in old_dirblob): _file_create(blob, os.path.join(path, fname)) elif (blob.hv != old_dirblob[fname].hv): if (blob.thv == "D"): _update(old_dirblob[fname], blob, os.path.join(path, fname)) elif (blob.thv == "F"): _file_create(blob, os.path.join(path, fname)) else: print(blob.thv) assert False # print('head: ', head) # print('master: ', master) headblob = self.blobstore.get_blob(head, "D") masterblob = self.blobstore.get_blob(master, "D") _update(headblob, masterblob, self.path_root) def update_head_and_prev(self, master): with open(self.get_prev(), "w") as f: f.write(master) with open(self.get_head(), "w") as f: f.write(master) def cmd_update(self): master = self.get_uptodate_master(False) # already up-to-date prev = self.get_prev_value() if (master == prev): self.update_head_and_prev(master) return True head = self.get_head_and_config() # XXX: need to check if non-checked in but modified files. if (head == prev): headstr = head.split(".")[0] if headstr == '': self.update_head_and_prev(master) return self.restore_from_master() masterstr = master.split(".")[0] self.update_changed(headstr, masterstr) else: ### need to merge raise Exception('Merge required') self.update_head_and_prev(master) self.blobstore.rootblob = None dbg.info("update done %s" % time.ctime()) return True #XXX: Seungyeop is working on it. def cmd_clone(self, namespace, backend=None, encrypt_key=None): # if wrong target if self.check_sanity(): return False # reset all the path by including the namespace self.path_root = os.path.join(self.path_root, namespace) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") #self.path_head_history = self.get_path("head_history") if os.path.exists(self.path_root): dbg.err("%s already exists." % self.path_root) return False if backend is None: print "input one of the storage backends, (e.g., dropbox,google,box)" print " for testing, use disk@/path (e.g., disk@/tmp)" backend = raw_input("> ") srv = services.factory(backend) self.namespace = namespace # create repo directory os.mkdir(self.path_root) os.mkdir(self.path_meta) os.mkdir(self.path_objs) curmaster = self.get_uptodate_master(False, srv) sp = curmaster.split(".") master = sp[0] seed = sp[1] seed = srv.get(self.get_remote_path("configs/%s" % seed)) conf = util.loads_config(seed) # setup client specific info conf.set('core', 'clientid', util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) with open(self.path_conf, "w") as fd: conf.write(fd) self._load() beg = time.time() self.bstore_download() self._join() with open(self.get_head(), "w") as f: f.write(curmaster) with open(self.get_prev(), "w") as f: f.write(curmaster) # send my head to remote self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) self._join() if (master): ret = self.restore_from_master() end = time.time() dbg.dbg("clone: %ss" % (end - beg)) return True def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None): # already initialized? if self.check_sanity(): dbg.err("already initialized %s (%s)" \ % (self.path_root, self.namespace)) return False os.mkdir(self.path_meta) os.mkdir(self.path_objs) # build config opts conf = util.new_config() # core: unique/permanent info about local machine (often called client) # NOTE. not sure if encryption_key should be in core, or unchangable conf.add_section('core') conf.set('core', 'namespace', namespace) conf.set('core', 'clientid', util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) # backend: info about sync service providers # XXX: Error handling conf.add_section('backend') try: services = _get_conf_services(backend) conf.set('backend', 'services', services) conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(",")))) except: pass # flush with open(self.path_conf, "w") as fd: conf.write(fd) try: self._load() except NameError: shutil.rmtree(self.path_meta) return False # put config into remote conf.remove_option('core', 'clientid') conf.remove_option('core', 'encryptkey') with io.BytesIO() as out: conf.write(out) val = out.getvalue() configname = util.sha1(val) self._put_all_content( val, self.get_remote_path("configs/%s" % configname[:6]), True) #temporary --- move this to pPaxos #self._put_all_content(configname[:6], self.get_remote_path("config"), True) # Format for master: headhash.config[:6].version prev_master = "." + configname[:6] + ".0" # do we need both? or shall we put them into a file together. with open(self.get_head(), "w") as f: f.write(prev_master) with open(self.get_prev(), "w") as f: f.write(prev_master) self._put_all_dir(self.get_remote_path("objects")) # change to put_content self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master)) self._join() return True def get_pPaxos_path(self, path): return self.get_remote_path("pPaxos/" + path) def cmd_gc(self): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False def _find_all_blobs(blob, tracked): # we may need to move this to blobstore if (blob.hv in tracked): return tracked.add(blob.hv) if (blob.thv == "C"): return for name, childblob in blob.entries.iteritems(): _find_all_blobs(childblob, tracked) # check head head = self.get_head_value() tracked = set([]) if (head is not None and len(head) > 0): blob = self.blobstore.get_blob(head, "D") _find_all_blobs(blob, tracked) # check master with open(self.path_master) as f: master_head = f.read().strip() if (len(master_head) > 0): blob = self.blobstore.get_blob(master_head, "D") _find_all_blobs(blob, tracked) allblobs = set(self.blobstore.list()) # remove following blobs_to_remove = allblobs - tracked def __rm(srv, remote_path): dbg.job("submitted to: %s (%s)" % (srv, remote_path)) srv.rm(remote_path) for hashname in blobs_to_remove: for i in self.mapping.get_mapping(hashname): self.scheduler.submit(self.srvmap[i], True, __rm, self.get_remote_obj_path(hashname)) os.unlink(self.get_local_obj_path(hashname)) return True def cmd_rm(self, pn): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False #TODO: check if the file exists beg = time.time() try: dirname = os.path.dirname(pn) dirblob = self.blobstore.load_dir(dirname, False) if (dirblob is None): dbg.err("%s does not exist" % pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(pn) if (not fname in dirblob): dbg.err("%s does not exist" % pn) return False dirblob.rm(fname) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() # we may need to include pointer for previous version. util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end - beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end - beg)) self._join() # drop local copy # TODO: rm only tracked files if removing file. try: os.unlink(pn) except: dbg.err("failed to rm %s" % pn) return False return True def append_history(self, hv): util.append_file(self.path_head_history, hv + "\n") def cmd_checkin(self, paths, unit=BLOB_UNIT, upload_only_first=False): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False if type(paths) != types.ListType: paths = [paths] for pn in paths: if not os.path.exists(pn): dbg.err("File %s doesn't exits." % pn) return False beg = time.time() #XXX: considering mtime, check hash of chunks? changed = False for path in paths: if (not os.path.isfile(path)): changed = True for root, dirs, files in os.walk(path): fsizesum = 0 for fname in files: fsizesum += os.stat(os.path.join(root, fname)).st_size print(root + " " + str(fsizesum)) if (fsizesum < unit): dirblob = self.blobstore.load_dir(root, dirty=True, merge=True) for fname in files: dirblob.add_file(fname, os.path.join(root, fname)) dirblob.done_adding() else: dirblob = self.blobstore.load_dir(root, dirty=True) for fname in files: fileblob = self.blobstore.load_file( os.path.join(root, fname), unit) if (fname in dirblob and dirblob[fname].hv == fileblob.hv): continue dirblob.add(fname, fileblob) else: fileblob = self.blobstore.load_file(path, unit) dirname = os.path.dirname(path) if (dirname == ""): dirname = "." dirblob = self.blobstore.load_dir(dirname, dirty=True) fname = os.path.basename(path) if (fname in dirblob and dirblob[fname].hv == fileblob.hv): continue changed = True dirblob.add(fname, fileblob) if (not changed): return True root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file( self.get_head(), "%s.%s.%d" % (root.hv, self.get_config_hash(), self.get_next_version())) end = time.time() dbg.time("local write: %f" % (end - beg)) # push new blobs remotely leftover = self.bstore_sync(newblobs) self._update_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._join() end = time.time() dbg.time("remote write for R1: %f" % (end - beg)) if (not upload_only_first): self.bstore_sync_left(leftover) end = time.time() dbg.time("remote write for left: %f" % (end - beg)) return [] else: return leftover def cmd_push(self): prev = self.get_prev_value() newvalue = self.get_head_and_config() val = self.propose_value(prev, newvalue) # print("val: ", val, "newval: ", newvalue, "prev: ", prev) if (val != newvalue): dbg.err("You should fetch first") return False # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # if(len(master_head) > 0): # head_history = self.get_history() # if(not master_head in head_history): # dbg.err("You should update first") # self.unlock_master() # return False # check master is ancestor of the head shutil.copyfile(self.get_head(), self.get_prev()) self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue)) self._join() return True def cmd_status(self, unit=BLOB_UNIT): def simple_walk(folder): # simple_walk will skip dipping into the folder # that are not tracked in the repo untracked = [] changed = [] for f in os.listdir(folder): if f == META_DIR: continue basename = os.path.basename(folder) if basename == '.' or basename == '': relpath = f else: relpath = os.path.join(folder, f) if relpath in tracked: if os.path.isdir(f): _untracked, _changed = simple_walk(relpath) untracked.extend(_untracked) changed.extend(_changed) else: fblob = tracked[relpath] # compare the file modified time and its metadata blob modified time curr_mtime = os.path.getmtime(relpath) last_mtime = os.path.getmtime( os.path.join(self.path_objs, fblob.hv)) if curr_mtime > last_mtime: # only load file when the file modified time is greater than metadata modified time fblob._load() flag = False # compare chunk hash for (offset, chunk) in util.each_chunk2(relpath, unit): if util.sha1( chunk) != fblob.entries[offset].hv: flag = True break if flag: changed.append(relpath) else: if os.path.isdir(relpath): relpath = os.path.join(relpath, '') untracked.append(relpath) return untracked, changed if not self.check_sanity(): dbg.err("this is not a metasync repo") return False # switch to metasync repo root folder os.chdir(self.path_root) # compare the head and master history head_history = self.get_history() master_history = self.get_history(True) head_diverge = 0 for head in head_history: if (head in master_history): break head_diverge += 1 if head_diverge == len(head_history): master_diverge = len(master_history) else: master_diverge = master_history.index(head_history[head_diverge]) if head_diverge == 0 and master_diverge == 0: print "\nYour branch is up-to-date with master." elif head_diverge == 0: print "\nYour branch is behind master by %d commit(s)." % master_diverge elif master_diverge == 0: print "\nYour branch is ahead of master by %d commit(s)." % head_diverge else: print "\nYour branch and master have diverged," print "and have %d and %d different commits each, respectively" % ( head_diverge, master_diverge) root = self.get_root_blob() tracked = {} for (path, blob) in root.walk(): tracked[path] = blob untracked, changed = simple_walk('.') if changed: print("\nChanges not checked in:") for f in changed: print("\033[31m\tmodified: %s\033[m" % f) if untracked: print("\nUntracked files:") for f in untracked: print("\033[31m\t%s\033[m" % f) return True
class Server: def __init__(self, config, globalConfig, client_sock=None): self.config = config self.globalConfig = globalConfig self.init_balance = 100 self.set = [] self.blockchain = [] self.proposer = Proposer(self.config, globalConfig) self.acceptor = Acceptor(self.config) self.inPaxos = False def run(self): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind((self.config["ip-addr"], self.config["port"])) sock.listen() try: with open(self.config["name"] + "_blockchain.txt", "rb") as f: loadedBlockchain = pickle.load(f) self.blockchain = loadedBlockchain f.close() except: print("File doesn't exist.") self.askResync() print("Server is listening...") while True: conn, addr = sock.accept() t1 = threading.Thread(target=Server.handleReq, args=( self, conn, )) t1.start() def handleReq(self, conn): while True: msg = conn.recv(1024) msg_set = set() msg_set.add(msg) for m in msg_set: if m: try: decodedMsg = pickle.loads(msg) t1 = threading.Thread(target=Server.handleClientMsg, args=( self, decodedMsg, conn, )) t1.start() except EOFError: print("pickle EOF") def handleClientMsg(self, decodedMsg, conn): #print("thread msg recvd", decodedMsg) if decodedMsg["msg"] == "TRANSFER": #add to set self.client_sock = conn self.set.append(decodedMsg) if len(self.set) >= 2: self.handlePaxos(decodedMsg) elif decodedMsg["msg"] == "PRINTBLOCKCHAIN": self.printBlockchain(decodedMsg, conn) elif decodedMsg["msg"] == "PRINTBALANCE": self.printBalance(decodedMsg, conn) elif decodedMsg["msg"] == "PRINTSET": self.printSet(decodedMsg, conn) elif decodedMsg["msg"] == "CRASH": msg = {} msg["msg"] = "CRASH-ACK" encMsg = pickle.dumps(msg) conn.sendall(encMsg) #conn.shutdown(socket.SHUT_RDWR) print("Emulating server crash.") #conn.close() os._exit(1) elif decodedMsg["msg"] == "RESYNC": print("Received RESYNC request from ", decodedMsg["src-name"]) startIndex = decodedMsg["cur-depth"] partialBlockchain = [] if (startIndex is None): startIndex = 0 else: startIndex = startIndex + 1 for x in range(startIndex, len(self.blockchain)): b = self.blockchain[x] copiedB = Block(b.tx1, b.tx2, b.prevHash, b.depth, b.nonce) partialBlockchain.append(copiedB) encMsg = pickle.dumps(createResyncAck(self, partialBlockchain)) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect( (self.globalConfig[decodedMsg["src-name"]]["ip-addr"], self.globalConfig[decodedMsg["src-name"]]["port"])) time.sleep(randDelay()) s.sendall(encMsg) except socket.error as sock_err: if (sock_err.errno == socket.errno.ECONNREFUSED): print("Server " + proc["name"] + " unreachable.") elif decodedMsg["msg"] == "RESYNC-ACK": #print("Received blockchain from ", decodedMsg["src-name"]) partialBlockchain = decodedMsg["blockchain"] lock.acquire() for b in partialBlockchain: if (b.depth == len(self.blockchain)): self.blockchain.append(b) lock.release() if (len(self.set) >= 2): x = {} x["msg"] = "RETRY" thp = threading.Thread(target=Server.handlePaxos, args=( self, x, )) thp.start() else: self.handlePaxos(decodedMsg) def handlePaxos(self, decodedMsg): lock.acquire() if (decodedMsg["msg"] == "TRANSFER" and self.inPaxos == False) or (decodedMsg["msg"] == "RETRY" and self.inPaxos == False): if (len(self.set) >= 2): self.inPaxos = True if (self.checkProposeReady() == False): tempMsg = createServerRes( self.config, decodedMsg, msgFormatTrans(self.set[0]) + " and " + msgFormatTrans(self.set[1]) + " have been added to the blockchain.", "TRANSFER-ACK") encMsg = pickle.dumps(tempMsg) self.client_sock.sendall(encMsg) self.inPaxos = False lock.release() if decodedMsg["msg"] == "PREPARE": if (decodedMsg["bal-num"].depth > len(self.blockchain)): self.askResync() if (decodedMsg["bal-num"] is not None and decodedMsg["bal-num"].depth == len(self.blockchain)): if (decodedMsg["src-name"] != self.config["name"] and self.proposer.balNum is not None and decodedMsg["bal-num"].seqNum > self.proposer.curSeqNum): self.proposer.curSeqNum = decodedMsg["bal-num"].seqNum self.acceptor.recvPrepare(decodedMsg) if (self.inPaxos == True): prevLen = len(self.blockchain) time.sleep(15) lock.acquire() if (prevLen == len(self.blockchain)): if (self.inPaxos == True): self.inPaxos = False print("Timed-out. Retrying. Prev len was ", prevLen, " but len blockchain now ", len(self.blockchain)) self.proposer.curSeqNum += 1 x = {} x["msg"] = "RETRY" thp = threading.Thread(target=Server.handlePaxos, args=( self, x, )) thp.start() lock.release() elif decodedMsg["bal-num"] and ( (len(self.blockchain) == 0 and decodedMsg["bal-num"].depth > 0) or (len(self.blockchain) < decodedMsg["bal-num"].depth)): self.askResync() elif decodedMsg["msg"] == "PREP-ACK": if decodedMsg["accept-num"] is None or ( decodedMsg["accept-num"] is not None and decodedMsg["accept-num"].depth == len(self.blockchain)): self.proposer.handlePrepAck(decodedMsg) elif decodedMsg["msg"] == "ACCEPT": if (decodedMsg["bal-num"] is not None and decodedMsg["bal-num"].depth == len(self.blockchain)): if (decodedMsg["src-name"] != self.config["name"] and self.proposer.balNum is not None and decodedMsg["bal-num"] >= self.proposer.balNum): self.proposer.curSeqNum = decodedMsg["bal-num"].seqNum self.acceptor.recvAccept(decodedMsg) elif decodedMsg["msg"] == "ACCEPT-ACK": if decodedMsg["accept-num"] is None or ( decodedMsg["accept-num"] is not None and decodedMsg["accept-num"].depth == len(self.blockchain)): self.proposer.handleAcceptAck(decodedMsg) elif decodedMsg["msg"] == "DECISION": if (decodedMsg["bal-num"] is not None and decodedMsg["bal-num"].depth == len(self.blockchain)): self.proposer.curSeqNum = 0 self.handleDecision(decodedMsg) x = {} x["msg"] = "RETRY" self.handlePaxos(x) elif decodedMsg["bal-num"] and ( (len(self.blockchain) == 0 and decodedMsg["bal-num"].depth > 0) or (len(self.blockchain) < decodedMsg["bal-num"].depth)): self.askResync() def askResync(self): print("Sending RESYNC request to all servers.") encMsg = pickle.dumps(createResyncRequest(self)) self.broadcast(encMsg) def broadcast(self, m): threads = [] for s in servers: if (s in PARTITION[self.config["name"]]): t = threading.Thread( target=Server.randDelayMsg, args=(self, m, self.globalConfig[s]), ) threads.append(t) else: print("NW partition - cannot speak to ", s) for t in threads: t.start() for t in threads: t.join() def randDelayMsg(self, m, proc): b = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: b.connect((proc["ip-addr"], proc["port"])) time.sleep(randDelay()) b.sendall(m) b.close() except socket.error as sock_err: if (sock_err.errno == socket.errno.ECONNREFUSED): pass def checkProposeReady(self): block = self.transactionCheck() if block is not None: self.createBallotThread(block) return True return False def transactionCheck(self): val = None if (len(self.set) >= 2): t1 = msgFormatTrans(self.set[0]) t2 = msgFormatTrans(self.set[1]) if (self.validateTrans(self.set[0], self.set[1]) == True): val = self.mineBlock(t1, t2) else: print("Previous 2 transactions not valid.") temp = self.set.pop(0) self.set.append(temp) temp = self.set.pop(0) self.set.append(temp) return val def calcBalance(self): # return dict of 5 balances balance = { 'A': self.init_balance, 'B': self.init_balance, 'C': self.init_balance, 'D': self.init_balance, 'E': self.init_balance } for b in self.blockchain: t1 = transFormatDict(b.tx1) t2 = transFormatDict(b.tx2) balance[t1["sender"]] = balance[t1["sender"]] - int(t1["amount"]) balance[t2["sender"]] = balance[t2["sender"]] - int(t2["amount"]) balance[t1["receiver"]] = balance[t1["receiver"]] + int( t1["amount"]) balance[t2["receiver"]] = balance[t2["receiver"]] + int( t2["amount"]) return balance def validateTrans(self, t1, t2): balance = self.calcBalance() return (balance[t1["sender"]] - t1["amount"] - t2["amount"] >= 0) def calcPrevHash(self, b): s = str(b.tx1 + b.tx2 + b.nonce) shaHash = hashlib.sha256(s.encode()) digest = shaHash.hexdigest() return digest def mineBlock(self, t1, t2): prevHash = None depth = 0 if (len(self.blockchain) > 0): prevHash = self.calcPrevHash(self.blockchain[len(self.blockchain) - 1]) depth = len(self.blockchain) b = Block(t1, t2, prevHash, depth) b.mine() return b def createBallotThread(self, block): self.proposer.createBallot(block, len(self.blockchain)) def handleDecision(self, dMsg): self.acceptor.recvDecision(dMsg) if (dMsg["bal-num"].depth == len(self.blockchain)): self.inPaxos = False if (len(self.set) >= 2 and dMsg["val"].tx1 == msgFormatTrans(self.set[0]) and dMsg["val"].tx2 == msgFormatTrans(self.set[1])): tempMsg = createServerRes( self.config, dMsg, msgFormatTrans(self.set[0]) + " and " + msgFormatTrans(self.set[1]) + " have been committed to the blockchain.", "TRANSFER-ACK") encMsg = pickle.dumps(tempMsg) self.client_sock.sendall(encMsg) self.set.pop( 0) # pop first 2 items because committed successfully self.set.pop(0) print("Popped transactions from set.") self.blockchain.append(dMsg["val"]) with open(self.config["name"] + "_blockchain.txt", "wb") as f: print("Saving current blockchain to disk.") pickle.dump(self.blockchain, f) f.close() print("New blockchain length: ", len(self.blockchain)) print("Commiting block to blockchain. Block: \n", dMsg["val"]) if (len(self.set) >= 2): x = {} x["msg"] = "RETRY" self.handlePaxos(x) else: print( "Not commiting block. Block Depth < Current Blockchain Depth") def printBlockchain(self, dMsg, conn): msg = createServerRes(self.config, dMsg, self.blockchain, "BLOCKCHAIN-ACK") encMsg = pickle.dumps(msg) conn.sendall(encMsg) def printBalance(self, dMsg, conn): balance = self.calcBalance() msg = createServerRes(self.config, dMsg, balance, "BALANCE-ACK") encMsg = pickle.dumps(msg) conn.sendall(encMsg) def printSet(self, dMsg, conn): #print("printing set") setList = [] for tran in self.set: setList.append(msgFormatTrans(tran)) msg = createServerRes(self.config, dMsg, setList, "SET-ACK") encMsg = pickle.dumps(msg) conn.sendall(encMsg)