def cmd_share(self, target_email): if not self.check_sanity(): dbg.err("this is not metasync repo") return False for srv in self.services: srv.share(self.namespace, target_email)
def cmd_push(self): prev = self.get_prev_value() newvalue = self.get_head_and_config() val = self.propose_value(prev, newvalue) # print("val: ", val, "newval: ", newvalue, "prev: ", prev) if (val != newvalue): dbg.err("You should fetch first") return False # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # if(len(master_head) > 0): # head_history = self.get_history() # if(not master_head in head_history): # dbg.err("You should update first") # self.unlock_master() # return False # check master is ancestor of the head shutil.copyfile(self.get_head(), self.get_prev()) self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue)) self._join() return True
def cmd_test(metasync, args, opts): "quick test (e.g., metasync test {%s})" # invoke pdb when failed in testing util.install_pdb() tmpdir = tempfile.mkdtemp() root = os.path.join(tmpdir, "repo") util.mkdirs(root) metasync = MetaSync(root) # opts for sub test routines opts.root = root opts.tmpdir = tmpdir opts.encrypt_key = "testkey" if opts.encrypt else "" dbg.info("root: %s" % root) dbg.info("args: %s" % args) dbg.info("opts: ") for (k, v) in vars(opts).iteritems(): dbg.info(" %-12s = %s" % (k, v)) alltests = dict(test.get_all_tests()) if any(case not in alltests for case in args): dbg.err("no such a test case: %s" % args) alltests["help"](metasync, opts) exit(1)
def cmd_push(self): prev = self.get_prev_value() newvalue = self.get_head_and_config() val = self.propose_value(prev, newvalue) if(val != newvalue): dbg.err("You should fetch first") return False # with open(self.path_master) as f: # master_head = f.read().strip() # with open(self.get_head()) as f: # head = f.read().strip() # if(len(master_head) > 0): # head_history = self.get_history() # if(not master_head in head_history): # dbg.err("You should update first") # self.unlock_master() # return False # check master is ancestor of the head shutil.copyfile(self.get_head(), self.get_prev()) self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue)) self._join() return True
def cmd_test(metasync, args, opts): "quick test (e.g., metasync test {%s})" # invoke pdb when failed in testing util.install_pdb() tmpdir = tempfile.mkdtemp() root = os.path.join(tmpdir, "repo") util.mkdirs(root) metasync = MetaSync(root) # opts for sub test routines opts.root = root opts.tmpdir = tmpdir opts.encrypt_key = "testkey" if opts.encrypt else "" dbg.info("root: %s" % root) dbg.info("args: %s" % args) dbg.info("opts: ") for (k, v) in vars(opts).iteritems(): dbg.info(" %-12s = %s" % (k, v)) alltests = dict(test.get_all_tests()) if any(case not in alltests for case in args): dbg.err("no such a test case: %s" % args) alltests["help"](metasync, opts) exit(1) # print help if no test case is provided if len(args) == 0: args = ["help"] for case in args: dbg.info("#R<testing %s#> (%s)" % (case, alltests[case].__doc__)) alltests[case](metasync, opts) # poorman's tree def tree(path): for root, dirs, files in os.walk(path): base = os.path.basename(root) idnt = ' ' * (root.replace(path, '').count(os.sep)) print('%s%s/' % (idnt, base)) for f in files: pn = os.path.join(root, f) print(' %s%s [%s]' % (idnt, f, os.stat(pn).st_size)) # dump some content of blobs if opts.dump and "objects" == base: print(util.hexdump(util.read_file(pn, 32*2))) print # dump root if not opts.notree: tree(tmpdir) # cleanup tmpdir if not opts.keep: shutil.rmtree(tmpdir)
def cmd_rm(metasync, args, opts): "remove a file (e.g., metasync remove [file])" if len(args) == 0: dbg.err("Need a file to remove") return -1 for f in args: metasync.cmd_rm(f)
def cmd_mv(metasync, args, opts): "move file (e.g., metasync mv [src] [dst])" if len(args) != 2: dbg.err("not enough arguments. e.g., metasync mv [src] [dst]") return -1 if not metasync.cmd_mv(args[0], args[1]): return -1
def cmd_share(metasync, args, opts): "share the repo with someone" if(len(args) < 1): dbg.err("Give an email to share your repo.") return -1 target_email = args[0] metasync.cmd_share(target_email)
def cmd_init(metasync, args, opts): "initialize the repo (e.g., metasync init [namespace])" # namespace to avoid conflict ns = args[0] if len(args) > 0 else str(util.gen_uuid()) if not metasync.cmd_init(ns): dbg.err("Can't initialize the repository") return -1
def cmd_share(metasync, args, opts): "share the repo with someone" if (len(args) < 1): dbg.err("Give an email to share your repo.") return -1 target_email = args[0] metasync.cmd_share(target_email)
def cmd_fetch(self): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False # TODO: change it into comparing between masters self.bstore_download() self._join() return True
def cmd_checkin(metasync, args, opts): "commit a file (e.g., metasync checkin [file])" if len(args) == 0: dbg.err("Need a file to checkin") return -1 target = [] for f in args: target.extend(glob.glob(f)) metasync.cmd_checkin(target)
def cmd_clone(metasync, args, opts): "clone the repo (e.g., metasync clone [namespace])" if (len(args) < 1): dbg.err("It requires namespace") return -1 ns = args[0] if not metasync.cmd_clone(ns): dbg.err("Can't clone the repository") return -1
def cmd_clone(metasync, args, opts): "clone the repo (e.g., metasync clone [namespace])" if(len(args) < 1): dbg.err("It requires namespace") return -1 ns = args[0] if not metasync.cmd_clone(ns): dbg.err("Can't clone the repository") return -1
def cmd_rm(self, pn): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False #TODO: check if the file exists beg = time.time() try: dirname = os.path.dirname(pn) dirblob = self.blobstore.load_dir(dirname, False) if (dirblob is None): dbg.err("%s does not exist" % pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(pn) if (not fname in dirblob): dbg.err("%s does not exist" % pn) return False dirblob.rm(fname) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() # we may need to include pointer for previous version. util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end - beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end - beg)) self._join() # drop local copy # TODO: rm only tracked files if removing file. try: os.unlink(pn) except: dbg.err("failed to rm %s" % pn) return False return True
def cmd_rm(self, pn): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False #TODO: check if the file exists beg = time.time() try: dirname = os.path.dirname(pn) dirblob = self.blobstore.load_dir(dirname, False) if(dirblob is None): dbg.err("%s does not exist" % pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(pn) if(not fname in dirblob): dbg.err("%s does not exist" % pn) return False dirblob.rm(fname) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() # we may need to include pointer for previous version. util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end-beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end-beg)) self._join() # drop local copy # TODO: rm only tracked files if removing file. try: os.unlink(pn) except: dbg.err("failed to rm %s" % pn) return False return True
def cmd_mv(self, src_pn, dst_pn): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False src_pn = os.path.abspath(src_pn) dst_pn = os.path.abspath(dst_pn) #TODO: check src_pn exists beg = time.time() try: dirname = os.path.dirname(src_pn) dirblob = self.blobstore.load_dir(dirname, False, dirty=True) if (dirblob is None): dbg.err("%s does not exist" % src_pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(src_pn) if (not fname in dirblob): dbg.err("%s does not exist" % pn) return False fblob = dirblob[fname] dirblob.rm(fname) dst_dirname = os.path.dirname(dst_pn) if (dirname != dst_dirname): dirblob = self.blobstore.load_dir(dirname, True, dirty=True) assert dirblob is not None dst_fname = os.path.basename(dst_pn) dirblob.add(dst_fname, fblob, dirty=False) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end - beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end - beg)) # move the file shutil.move(src_pn, dst_pn) self._join() return True
def factory(srv): srv = srv.strip() if srv.startswith("disk@"): root = srv.split("@")[1] return DiskAPI(root) elif srv == "google": return GoogleAPI() elif srv == "box": return BoxAPI() elif srv == "dropbox": return DropboxAPI() elif srv == "baidu": return BaiduAPI() elif srv == "onedrive": return OneDriveAPI() dbg.err("No such a provider: %s" % srv) raise NameError(srv)
def cmd_mv(self, src_pn, dst_pn): if not self.check_sanity(): dbg.err("it's not a metasync repo.") return False src_pn = os.path.abspath(src_pn) dst_pn = os.path.abspath(dst_pn) #TODO: check src_pn exists beg = time.time() try: dirname = os.path.dirname(src_pn) dirblob = self.blobstore.load_dir(dirname, False, dirty=True) if(dirblob is None): dbg.err("%s does not exist" % src_pn) return False except NotTrackedException as e: dbg.err(str(e)) return False fname = os.path.basename(src_pn) if(not fname in dirblob): dbg.err("%s does not exist" % pn) return False fblob = dirblob[fname] dirblob.rm(fname) dst_dirname = os.path.dirname(dst_pn) if(dirname != dst_dirname): dirblob = self.blobstore.load_dir(dirname, True, dirty=True) assert dirblob is not None dst_fname = os.path.basename(dst_pn) dirblob.add(dst_fname, fblob, dirty=False) root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file(self.get_head(), root.hv) self.append_history(root.hv) end = time.time() dbg.time("local write: %f" % (end-beg)) # push new blobs remotely self.bstore_sync(newblobs) self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) end = time.time() dbg.time("remote write: %f" % (end-beg)) # move the file shutil.move(src_pn, dst_pn) self._join() return True
def start(metasync, args, opts): if not metasync.check_sanity(): dbg.err("Not a metasync directory") exit(1) daemon = MetaSyncDaemon(metasync) # invoke observer if(not opts.nocheckin): observer = Observer() observer.schedule(daemon, metasync.path_root, recursive=True) observer.start() # stupid poll - change to event-driven. try: while True: time.sleep(SYNC_WAIT) if(time.time()-daemon.lastupdate < 0.5): continue # batching -> TODO. commit files, dirs = daemon.get_all_files() files = list(files) for d in dirs: files = filter(lambda x:not x.startswith(d), files) for d in dirs: files.append(d) # TODO: can we do it together? if(len(files) > 0): leftover = metasync.cmd_checkin(files, upload_only_first=True) metasync.cmd_push() if(len(leftover) > 0): metasync.bstore_sync_left(leftover) #dbg.info("%s" % files) #STUPID pull --- check fetch metasync.cmd_fetch() metasync.cmd_update() except KeyboardInterrupt: observer.stop() observer.join()
def _get_conf_nreplicas(default, nservices): assert type(default) in [types.NoneType, int] if default is not None: return str(default) # dirty user's input print "input the number of replicas (default=2)" while True: replicas = raw_input("> ").strip() if replicas == "": replicas = "2" if replicas.isdigit(): if int(replicas) > nservices: dbg.err("the number of replicas should not be larger than the number of services") else: return replicas else: print "input the number"
def start(metasync, args, opts): if not metasync.check_sanity(): dbg.err("Not a metasync directory") exit(1) daemon = MetaSyncDaemon(metasync) # invoke observer if (not opts.nocheckin): observer = Observer() observer.schedule(daemon, metasync.path_root, recursive=True) observer.start() # stupid poll - change to event-driven. try: while True: time.sleep(SYNC_WAIT) if (time.time() - daemon.lastupdate < 0.5): continue # batching -> TODO. commit files, dirs = daemon.get_all_files() files = list(files) for d in dirs: files = filter(lambda x: not x.startswith(d), files) for d in dirs: files.append(d) # TODO: can we do it together? if (len(files) > 0): leftover = metasync.cmd_checkin(files, upload_only_first=True) metasync.cmd_push() if (isinstance(leftover, dict) and len(leftover) > 0): metasync.bstore_sync_left(leftover) #dbg.info("%s" % files) #STUPID pull --- check fetch metasync.cmd_fetch() metasync.cmd_update() except KeyboardInterrupt: observer.stop() observer.join()
def cmd_gc(self): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False def _find_all_blobs(blob, tracked): # we may need to move this to blobstore if (blob.hv in tracked): return tracked.add(blob.hv) if (blob.thv == "C"): return for name, childblob in blob.entries.iteritems(): _find_all_blobs(childblob, tracked) # check head head = self.get_head_value() tracked = set([]) if (head is not None and len(head) > 0): blob = self.blobstore.get_blob(head, "D") _find_all_blobs(blob, tracked) # check master with open(self.path_master) as f: master_head = f.read().strip() if (len(master_head) > 0): blob = self.blobstore.get_blob(master_head, "D") _find_all_blobs(blob, tracked) allblobs = set(self.blobstore.list()) # remove following blobs_to_remove = allblobs - tracked def __rm(srv, remote_path): dbg.job("submitted to: %s (%s)" % (srv, remote_path)) srv.rm(remote_path) for hashname in blobs_to_remove: for i in self.mapping.get_mapping(hashname): self.scheduler.submit(self.srvmap[i], True, __rm, self.get_remote_obj_path(hashname)) os.unlink(self.get_local_obj_path(hashname)) return True
def cmd_gc(self): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False def _find_all_blobs(blob, tracked): # we may need to move this to blobstore if(blob.hv in tracked): return tracked.add(blob.hv) if(blob.thv == "C"): return for name, childblob in blob.entries.iteritems(): _find_all_blobs(childblob, tracked) # check head head = self.get_head_value() tracked = set([]) if(head is not None and len(head)>0): blob = self.blobstore.get_blob(head, "D") _find_all_blobs(blob, tracked) # check master with open(self.path_master) as f: master_head = f.read().strip() if(len(master_head) > 0): blob = self.blobstore.get_blob(master_head, "D") _find_all_blobs(blob, tracked) allblobs = set(self.blobstore.list()) # remove following blobs_to_remove = allblobs - tracked def __rm(srv, remote_path): dbg.job("submitted to: %s (%s)" % (srv, remote_path)) srv.rm(remote_path) for hashname in blobs_to_remove: for i in self.mapping.get_mapping(hashname): self.scheduler.submit(self.srvmap[i], True, __rm, self.get_remote_obj_path(hashname)) os.unlink(self.get_local_obj_path(hashname)) return True
def _get_conf_nreplicas(default, nservices): assert type(default) in [types.NoneType, int] if default is not None: return str(default) # dirty user's input print "input the number of replicas (default=2)" while True: replicas = raw_input("> ").strip() if replicas == "": replicas = "2" if replicas.isdigit(): if int(replicas) > nservices: dbg.err( "the number of replicas should not be larger than the number of services" ) else: return replicas else: print "input the number"
def _authorize(): import getpass, time from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By params = { 'response_type': 'code', 'client_id': CLIENT_ID, 'redirect_uri': 'oob', 'scope': 'netdisk' } url = OAuth2.AUTH_URL + '?' + urllib.urlencode(params) #print 'Open auth url:', url browser = webdriver.PhantomJS(service_log_path=os.path.join(tempfile.gettempdir(), 'ghostdriver.log'), service_args=['--ignore-ssl-errors=true', '--ssl-protocol=tlsv1']) browser.get(url) try: wait = WebDriverWait(browser, 30) username = wait.until(EC.presence_of_element_located((By.NAME, "userName"))) username.send_keys(raw_input("Enter your baidu userid:")) pwd = browser.find_element_by_name("password") pwd.send_keys(getpass.getpass("Enter your baidu password:"******"TANGRAM__3__submit") btn.click() wait = WebDriverWait(browser, 30) verify = wait.until(EC.presence_of_element_located((By.ID, "Verifier"))) code = verify.get_attribute('value') if not code: raise Exception('User denied authroization') except: browser.quit() import dbg dbg.err("error in processing") print 'open auth url: ', url webbrowser.open(url) code = raw_input("Copy the authroization code: ").strip() return code
def _get_conf_encryptkey(default): assert type(default) in [types.NoneType, str] if default is not None: return default # NOTE. # empty encrypt_key means, no-encryption encrypt_key = "" print "do you use encryption (y/n)?" while True: encrypt_yn = raw_input("> ").strip().lower() if (encrypt_yn not in ['y', 'n']): dbg.err("input with y/n") continue break if (encrypt_yn == 'y'): print "input keyphrase:" encrypt_key = raw_input("> ").strip() return encrypt_key
def _get_conf_encryptkey(default): assert type(default) in [types.NoneType, str] if default is not None: return default # NOTE. # empty encrypt_key means, no-encryption encrypt_key = "" print "do you use encryption (y/n)?" while True: encrypt_yn = raw_input("> ").strip().lower() if(encrypt_yn not in ['y','n']): dbg.err("input with y/n") continue break if(encrypt_yn == 'y'): print "input keyphrase:" encrypt_key = raw_input("> ").strip() return encrypt_key
def _authorize(): import getpass, time from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options params = { 'response_type': 'code', 'client_id': CLIENT_ID, 'redirect_uri': 'oob', 'scope': 'netdisk' } url = OAuth2.AUTH_URL + '?' + urllib.urlencode(params) #print 'Open auth url:', url opts = Options() # Set chrome binary if needed #opts.binary_location = '/usr/bin/chromium-browser' browser = webdriver.Chrome(chrome_options=opts) browser.get(url) try: wait = WebDriverWait(browser, 60) verify = wait.until( EC.presence_of_element_located((By.ID, "Verifier"))) code = verify.get_attribute('value') if not code: raise Exception('User denied authroization') except: browser.quit() import dbg dbg.err("error in processing") print 'open auth url: ', url webbrowser.open(url) code = raw_input("Copy the authroization code: ").strip() return code
def try_merge(self, head_history, master_history): # this need to be fixed. dbg.dbg("Trying to merge") # we may need to cache the last branched point common = self.get_common_ancestor(head_history, master_history) dbg.dbg("%s %s %s", head_history[0], master_history[0], common) common = self.blobstore.get_blob(common, "D") head = self.get_root_blob() master = self.blobstore.get_blob(master_history[0], "D") added1 = head.diff(common) added2 = master.diff(common) def intersect(a, b): return list(set(a) & set(b)) if (len(intersect(added1.keys(), added2.keys())) != 0): dbg.err("both modified--we need to handle it") return False for i in added2.keys(): path = os.path.join(self.path_root, i) dirblob = self.blobstore.load_dir(os.path.dirname(path), dirty=True) dirblob.add(os.path.basename(path), added2[i], dirty=False) # HACK, need to go through all the non-overlapped history. self.append_history(master.hv) head.store() self.append_history(head.hv) # HACK, need to be changed newblobs = self.blobstore.get_added_blobs() # push new blobs remotely self.bstore_sync(newblobs) self._join() return True
def cmd_diff(self): # work only for 1-level directory # need to add diff for file if not self.check_sanity(): dbg.err("this is not metasync repo") return False root = self.get_root_blob() added = [] removed = [] files = os.listdir(".") for f in files: if(f == ".metasync"): continue if("/"+f not in root.files): added.append(f) for f in root.files: if(f[1:] not in files): removed.append(f[1:]) for f in added: print("+++ %s" % f) for f in removed: print("--- %s" % f)
def try_merge(self, head_history, master_history): # this need to be fixed. dbg.dbg("Trying to merge") # we may need to cache the last branched point common = self.get_common_ancestor(head_history, master_history) dbg.dbg("%s %s %s", head_history[0], master_history[0], common) common = self.blobstore.get_blob(common, "D") head = self.get_root_blob() master = self.blobstore.get_blob(master_history[0], "D") added1 = head.diff(common) added2 = master.diff(common) def intersect(a, b): return list(set(a) & set(b)) if(len(intersect(added1.keys(), added2.keys())) != 0): dbg.err("both modified--we need to handle it") return False for i in added2.keys(): path = os.path.join(self.path_root, i) dirblob = self.blobstore.load_dir(os.path.dirname(path), dirty=True) dirblob.add(os.path.basename(path), added2[i], dirty=False) # HACK, need to go through all the non-overlapped history. self.append_history(master.hv) head.store() self.append_history(head.hv) # HACK, need to be changed newblobs = self.blobstore.get_added_blobs() # push new blobs remotely self.bstore_sync(newblobs) self._join() return True
def cmd_diff(self): # work only for 1-level directory # need to add diff for file if not self.check_sanity(): dbg.err("this is not metasync repo") return False root = self.get_root_blob() added = [] removed = [] files = os.listdir(".") for f in files: if (f == ".metasync"): continue if ("/" + f not in root.files): added.append(f) for f in root.files: if (f[1:] not in files): removed.append(f[1:]) for f in added: print("+++ %s" % f) for f in removed: print("--- %s" % f)
def cmd_checkin(self, paths, unit=BLOB_UNIT, upload_only_first=False): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False if type(paths) != types.ListType: paths = [paths] for pn in paths: if not os.path.exists(pn): dbg.err("File %s doesn't exits." % pn) return False beg = time.time() #XXX: considering mtime, check hash of chunks? changed = False for path in paths: if(not os.path.isfile(path)): changed = True for root, dirs, files in os.walk(path): fsizesum = 0 for fname in files: fsizesum += os.stat(os.path.join(root,fname)).st_size print(root + " " + str(fsizesum)) if(fsizesum < unit): dirblob = self.blobstore.load_dir(root, dirty=True, merge=True) for fname in files: dirblob.add_file(fname, os.path.join(root, fname)) dirblob.done_adding() else: dirblob = self.blobstore.load_dir(root, dirty=True) for fname in files: fileblob = self.blobstore.load_file(os.path.join(root, fname), unit) if(fname in dirblob and dirblob[fname].hv == fileblob.hv): continue dirblob.add(fname, fileblob) else: fileblob = self.blobstore.load_file(path, unit) dirname = os.path.dirname(path) if(dirname == ""): dirname = "." dirblob = self.blobstore.load_dir(dirname, dirty=True) fname = os.path.basename(path) if(fname in dirblob and dirblob[fname].hv == fileblob.hv): continue changed = True dirblob.add(fname, fileblob) if(not changed): return True root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file(self.get_head(), "%s.%s.%d" % (root.hv, self.get_config_hash(), self.get_next_version())) end = time.time() dbg.time("local write: %f" % (end-beg)) # push new blobs remotely leftover = self.bstore_sync(newblobs) self._update_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._join() end = time.time() dbg.time("remote write for R1: %f" % (end-beg)) if(not upload_only_first): self.bstore_sync_left(leftover) end = time.time() dbg.time("remote write for left: %f" % (end-beg)) return [] else: return leftover
def cmd_status(self, unit=BLOB_UNIT): def simple_walk(folder): # simple_walk will skip dipping into the folder # that are not tracked in the repo untracked = [] changed = [] for f in os.listdir(folder): if f == META_DIR: continue basename = os.path.basename(folder) if basename == '.' or basename == '': relpath = f else: relpath = os.path.join(folder, f) if relpath in tracked: if os.path.isdir(f): _untracked, _changed = simple_walk(relpath) untracked.extend(_untracked) changed.extend(_changed) else: fblob = tracked[relpath] # compare the file modified time and its metadata blob modified time curr_mtime = os.path.getmtime(relpath) last_mtime = os.path.getmtime(os.path.join(self.path_objs, fblob.hv)) if curr_mtime > last_mtime: # only load file when the file modified time is greater than metadata modified time fblob._load() flag = False # compare chunk hash for (offset, chunk) in util.each_chunk2(relpath, unit): if util.sha1(chunk) != fblob.entries[offset].hv: flag = True break if flag: changed.append(relpath) else: if os.path.isdir(relpath): relpath = os.path.join(relpath, '') untracked.append(relpath) return untracked, changed if not self.check_sanity(): dbg.err("this is not a metasync repo") return False # switch to metasync repo root folder os.chdir(self.path_root) # compare the head and master history head_history = self.get_history() master_history = self.get_history(True) head_diverge = 0 for head in head_history: if (head in master_history): break head_diverge += 1 if head_diverge == len(head_history): master_diverge = len(master_history) else: master_diverge = master_history.index(head_history[head_diverge]) if head_diverge == 0 and master_diverge == 0: print "\nYour branch is up-to-date with master." elif head_diverge == 0: print "\nYour branch is behind master by %d commit(s)." % master_diverge elif master_diverge == 0: print "\nYour branch is ahead of master by %d commit(s)." % head_diverge else: print "\nYour branch and master have diverged," print "and have %d and %d different commits each, respectively" % (head_diverge, master_diverge) root = self.get_root_blob() tracked = {} for (path, blob) in root.walk(): tracked[path] = blob untracked, changed = simple_walk('.') if changed: print("\nChanges not checked in:") for f in changed: print("\033[31m\tmodified: %s\033[m" % f) if untracked: print("\nUntracked files:") for f in untracked: print("\033[31m\t%s\033[m" % f) return True
def cmd_push(metasync, args, opts): "push changes to master" if not metasync.cmd_push(): dbg.err("Can't push") return -1
def cmd_daemon(metasync, args, opts): "invoke a daemon (and wait) - currently disabled" if(opts.debug): daemon.start(metasync, args, opts) else: dbg.err("Currently daemon is supported only for debug mode.")
def cmd_update(metasync, args, opts): "update fetched changes into local filesystem. (e.g., metasync update)" if not metasync.cmd_update(): dbg.err("Can't update") return -1
def cmd_status(self, unit=BLOB_UNIT): def simple_walk(folder): # simple_walk will skip dipping into the folder # that are not tracked in the repo untracked = [] changed = [] for f in os.listdir(folder): if f == META_DIR: continue basename = os.path.basename(folder) if basename == '.' or basename == '': relpath = f else: relpath = os.path.join(folder, f) if relpath in tracked: if os.path.isdir(f): _untracked, _changed = simple_walk(relpath) untracked.extend(_untracked) changed.extend(_changed) else: fblob = tracked[relpath] # compare the file modified time and its metadata blob modified time curr_mtime = os.path.getmtime(relpath) last_mtime = os.path.getmtime( os.path.join(self.path_objs, fblob.hv)) if curr_mtime > last_mtime: # only load file when the file modified time is greater than metadata modified time fblob._load() flag = False # compare chunk hash for (offset, chunk) in util.each_chunk2(relpath, unit): if util.sha1( chunk) != fblob.entries[offset].hv: flag = True break if flag: changed.append(relpath) else: if os.path.isdir(relpath): relpath = os.path.join(relpath, '') untracked.append(relpath) return untracked, changed if not self.check_sanity(): dbg.err("this is not a metasync repo") return False # switch to metasync repo root folder os.chdir(self.path_root) # compare the head and master history head_history = self.get_history() master_history = self.get_history(True) head_diverge = 0 for head in head_history: if (head in master_history): break head_diverge += 1 if head_diverge == len(head_history): master_diverge = len(master_history) else: master_diverge = master_history.index(head_history[head_diverge]) if head_diverge == 0 and master_diverge == 0: print "\nYour branch is up-to-date with master." elif head_diverge == 0: print "\nYour branch is behind master by %d commit(s)." % master_diverge elif master_diverge == 0: print "\nYour branch is ahead of master by %d commit(s)." % head_diverge else: print "\nYour branch and master have diverged," print "and have %d and %d different commits each, respectively" % ( head_diverge, master_diverge) root = self.get_root_blob() tracked = {} for (path, blob) in root.walk(): tracked[path] = blob untracked, changed = simple_walk('.') if changed: print("\nChanges not checked in:") for f in changed: print("\033[31m\tmodified: %s\033[m" % f) if untracked: print("\nUntracked files:") for f in untracked: print("\033[31m\t%s\033[m" % f) return True
def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None): # already initialized? if self.check_sanity(): dbg.err("already initialized %s (%s)" \ % (self.path_root, self.namespace)) return False os.mkdir(self.path_meta) os.mkdir(self.path_objs) # build config opts conf = util.new_config() # core: unique/permanent info about local machine (often called client) # NOTE. not sure if encryption_key should be in core, or unchangable conf.add_section('core') conf.set('core', 'namespace' , namespace) conf.set('core', 'clientid' , util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) # backend: info about sync service providers # XXX: Error handling conf.add_section('backend') try: services = _get_conf_services(backend) conf.set('backend', 'services' , services) conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(",")))) except: pass # flush with open(self.path_conf, "w") as fd: conf.write(fd) try: self._load() except NameError: shutil.rmtree(self.path_meta) return False # put config into remote conf.remove_option('core','clientid') conf.remove_option('core','encryptkey') with io.BytesIO() as out: conf.write(out) val = out.getvalue() configname = util.sha1(val) self._put_all_content(val, self.get_remote_path("configs/%s" % configname[:6]), True) #temporary --- move this to pPaxos #self._put_all_content(configname[:6], self.get_remote_path("config"), True) # Format for master: headhash.config[:6].version prev_master = "." + configname[:6] + ".0" # do we need both? or shall we put them into a file together. with open(self.get_head(), "w") as f: f.write(prev_master) with open(self.get_prev(), "w") as f: f.write(prev_master) self._put_all_dir(self.get_remote_path("objects")) # change to put_content self._put_all(self.get_head() , self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev() , self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master)) self._join() return True
def cmd_clone(self, namespace, backend=None, encrypt_key=None): # if wrong target if self.check_sanity(): return False # reset all the path by including the namespace self.path_root = os.path.join(self.path_root, namespace) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") #self.path_head_history = self.get_path("head_history") if os.path.exists(self.path_root): dbg.err("%s already exists." % self.path_root) return False if backend is None: print "input one of the storage backends, (e.g., dropbox,google,box)" print " for testing, use disk@/path (e.g., disk@/tmp)" backend = raw_input("> ") srv = services.factory(backend) self.namespace = namespace # create repo directory os.mkdir(self.path_root) os.mkdir(self.path_meta) os.mkdir(self.path_objs) curmaster = self.get_uptodate_master(False, srv) sp = curmaster.split(".") master = sp[0] seed = sp[1] seed = srv.get(self.get_remote_path("configs/%s" % seed)) conf = util.loads_config(seed) # setup client specific info conf.set('core', 'clientid' , util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) with open(self.path_conf, "w") as fd: conf.write(fd) self._load() beg = time.time() self.bstore_download() self._join() with open(self.get_head(), "w") as f: f.write(curmaster) with open(self.get_prev(), "w") as f: f.write(curmaster) # send my head to remote self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) self._join() if (master): ret = self.restore_from_master() end = time.time() dbg.dbg("clone: %ss" % (end-beg)) return True
def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None): # already initialized? if self.check_sanity(): dbg.err("already initialized %s (%s)" \ % (self.path_root, self.namespace)) return False os.mkdir(self.path_meta) os.mkdir(self.path_objs) # build config opts conf = util.new_config() # core: unique/permanent info about local machine (often called client) # NOTE. not sure if encryption_key should be in core, or unchangable conf.add_section('core') conf.set('core', 'namespace', namespace) conf.set('core', 'clientid', util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) # backend: info about sync service providers # XXX: Error handling conf.add_section('backend') try: services = _get_conf_services(backend) conf.set('backend', 'services', services) conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(",")))) except: pass # flush with open(self.path_conf, "w") as fd: conf.write(fd) try: self._load() except NameError: shutil.rmtree(self.path_meta) return False # put config into remote conf.remove_option('core', 'clientid') conf.remove_option('core', 'encryptkey') with io.BytesIO() as out: conf.write(out) val = out.getvalue() configname = util.sha1(val) self._put_all_content( val, self.get_remote_path("configs/%s" % configname[:6]), True) #temporary --- move this to pPaxos #self._put_all_content(configname[:6], self.get_remote_path("config"), True) # Format for master: headhash.config[:6].version prev_master = "." + configname[:6] + ".0" # do we need both? or shall we put them into a file together. with open(self.get_head(), "w") as f: f.write(prev_master) with open(self.get_prev(), "w") as f: f.write(prev_master) self._put_all_dir(self.get_remote_path("objects")) # change to put_content self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) from paxos import Proposer self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master)) self._join() return True
def cmd_clone(self, namespace, backend=None, encrypt_key=None): # if wrong target if self.check_sanity(): return False # reset all the path by including the namespace self.path_root = os.path.join(self.path_root, namespace) self.path_meta = os.path.join(self.path_root, META_DIR) self.path_conf = self.get_path("config") self.path_objs = self.get_path("objects") #self.path_head_history = self.get_path("head_history") if os.path.exists(self.path_root): dbg.err("%s already exists." % self.path_root) return False if backend is None: print "input one of the storage backends, (e.g., dropbox,google,box)" print " for testing, use disk@/path (e.g., disk@/tmp)" backend = raw_input("> ") srv = services.factory(backend) self.namespace = namespace # create repo directory os.mkdir(self.path_root) os.mkdir(self.path_meta) os.mkdir(self.path_objs) curmaster = self.get_uptodate_master(False, srv) sp = curmaster.split(".") master = sp[0] seed = sp[1] seed = srv.get(self.get_remote_path("configs/%s" % seed)) conf = util.loads_config(seed) # setup client specific info conf.set('core', 'clientid', util.gen_uuid()) conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key)) with open(self.path_conf, "w") as fd: conf.write(fd) self._load() beg = time.time() self.bstore_download() self._join() with open(self.get_head(), "w") as f: f.write(curmaster) with open(self.get_prev(), "w") as f: f.write(curmaster) # send my head to remote self._put_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name())) self._join() if (master): ret = self.restore_from_master() end = time.time() dbg.dbg("clone: %ss" % (end - beg)) return True
def cmd_checkin(self, paths, unit=BLOB_UNIT, upload_only_first=False): if not self.check_sanity(): dbg.err("this is not a metasync repo") return False if type(paths) != types.ListType: paths = [paths] for pn in paths: if not os.path.exists(pn): dbg.err("File %s doesn't exits." % pn) return False beg = time.time() #XXX: considering mtime, check hash of chunks? changed = False for path in paths: if (not os.path.isfile(path)): changed = True for root, dirs, files in os.walk(path): fsizesum = 0 for fname in files: fsizesum += os.stat(os.path.join(root, fname)).st_size print(root + " " + str(fsizesum)) if (fsizesum < unit): dirblob = self.blobstore.load_dir(root, dirty=True, merge=True) for fname in files: dirblob.add_file(fname, os.path.join(root, fname)) dirblob.done_adding() else: dirblob = self.blobstore.load_dir(root, dirty=True) for fname in files: fileblob = self.blobstore.load_file( os.path.join(root, fname), unit) if (fname in dirblob and dirblob[fname].hv == fileblob.hv): continue dirblob.add(fname, fileblob) else: fileblob = self.blobstore.load_file(path, unit) dirname = os.path.dirname(path) if (dirname == ""): dirname = "." dirblob = self.blobstore.load_dir(dirname, dirty=True) fname = os.path.basename(path) if (fname in dirblob and dirblob[fname].hv == fileblob.hv): continue changed = True dirblob.add(fname, fileblob) if (not changed): return True root = self.get_root_blob() root.store() newblobs = self.blobstore.get_added_blobs() util.write_file( self.get_head(), "%s.%s.%d" % (root.hv, self.get_config_hash(), self.get_next_version())) end = time.time() dbg.time("local write: %f" % (end - beg)) # push new blobs remotely leftover = self.bstore_sync(newblobs) self._update_all(self.get_head(), self.get_remote_path(self.get_head_name())) self._join() end = time.time() dbg.time("remote write for R1: %f" % (end - beg)) if (not upload_only_first): self.bstore_sync_left(leftover) end = time.time() dbg.time("remote write for left: %f" % (end - beg)) return [] else: return leftover
def cmd_daemon(metasync, args, opts): "invoke a daemon (and wait) - currently disabled" if (opts.debug): daemon.start(metasync, args, opts) else: dbg.err("Currently daemon is supported only for debug mode.")