示例#1
0
    def cmd_push(self):
        prev = self.get_prev_value()
        newvalue = self.get_head_and_config()
        val = self.propose_value(prev, newvalue)
        # print("val: ", val, "newval: ", newvalue, "prev: ", prev)
        if (val != newvalue):
            dbg.err("You should fetch first")
            return False

        # with open(self.path_master) as f:
        #     master_head = f.read().strip()
        # with open(self.get_head()) as f:
        #     head = f.read().strip()
        # if(len(master_head) > 0):
        #     head_history = self.get_history()
        #     if(not master_head in head_history):
        #         dbg.err("You should update first")
        #         self.unlock_master()
        #         return False
        # check master is ancestor of the head

        shutil.copyfile(self.get_head(), self.get_prev())
        self._update_all(self.get_prev(),
                         self.get_remote_path(self.get_prev_name()))
        from paxos import Proposer
        self.proposer = Proposer(None, self.services,
                                 self.get_pPaxos_path(newvalue))
        self._join()
        return True
示例#2
0
 def __init__(self, config, globalConfig, client_sock=None):
     self.config = config
     self.globalConfig = globalConfig
     self.init_balance = 100
     self.set = []
     self.blockchain = []
     self.proposer = Proposer(self.config, globalConfig)
     self.acceptor = Acceptor(self.config)
     self.inPaxos = False
示例#3
0
def test_paxos_latency(metasync, opts):
    lock = "locktest/ltest_latency"
    import services
    srvs = ["onedrive"]
    srvs_instance = map(services.factory, srvs)

    for srv in srvs_instance:
        if not srv.exists(lock):
            srv.put(lock, '')

    from paxos import Proposer
    proposer = Proposer("1", srvs_instance, lock)
    val = proposer.propose("1")
    assert val == "1"
    proposer.join()
示例#4
0
def test_paxos_latency(metasync, opts):
    lock = "locktest/ltest_latency"
    import services
    srvs = ["onedrive"]
    srvs_instance = map(services.factory, srvs)

    for srv in srvs_instance:
        if not srv.exists(lock):
            srv.put(lock, '')

    from paxos import Proposer
    proposer = Proposer("1", srvs_instance, lock)
    val = proposer.propose("1")
    assert val == "1"
    proposer.join()
示例#5
0
    def cmd_push(self):
        prev = self.get_prev_value()
        newvalue = self.get_head_and_config()
        val = self.propose_value(prev, newvalue)
        if(val != newvalue):
            dbg.err("You should fetch first")
            return False

        # with open(self.path_master) as f:
        #     master_head = f.read().strip()
        # with open(self.get_head()) as f:
        #     head = f.read().strip()
        # if(len(master_head) > 0):
        #     head_history = self.get_history()
        #     if(not master_head in head_history):
        #         dbg.err("You should update first")
        #         self.unlock_master()
        #         return False
        # check master is ancestor of the head

        shutil.copyfile(self.get_head(), self.get_prev())
        self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name()))
        from paxos import Proposer
        self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue))
        self._join()
        return True
示例#6
0
 def __init__(self, pid):
     super(PaxosProcess, self).__init__(pid)
     self.process_count = 0
     self.proposers = defaultdict(lambda: Proposer(self.process_count))
     self.acceptors = defaultdict(lambda: Acceptor(self.process_count))
     self.learners = defaultdict(lambda: Learner(self.process_count))
     self.client_requests = []
     self.internal_requests = []
示例#7
0
def test_paxos(metasync, opts):
    "test paxos with disk_api"
    lock = 'locktest/ltest'

    test_init(metasync, opts)
    srvs = metasync.services

    for srv in srvs:
        if not srv.exists(lock):
            srv.put(lock, '')

    from paxos import Proposer

    proposer = Proposer("1", srvs, lock)
    val = proposer.propose("1")
    assert val == "1"
    proposer.join()
示例#8
0
def test_paxos(metasync, opts):
    "test paxos with disk_api"
    lock = 'locktest/ltest'

    test_init(metasync, opts)
    srvs = metasync.services

    for srv in srvs:
        if not srv.exists(lock):
            srv.put(lock, '')

    from paxos import Proposer

    proposer = Proposer("1", srvs, lock)
    val = proposer.propose("1")
    assert val == "1"
    proposer.join()
示例#9
0
def test_paxos_services(metasync, opts):
    "test paxos with services"

    # init tmp repo to play with
    #test_init(metasync, opts)

    # init lock primitives
    lock = 'locktest/ltest2'
    targets = ["google", "box", "dropbox"]
    srvs = map(services.factory, targets)
    for srv in srvs:
        if not srv.exists(lock):
            srv.put(lock, '')

    from paxos import Proposer

    proposer = Proposer("1", srvs, lock)
    val = proposer.propose("1")
    assert val == "1"
    proposer.join()
示例#10
0
def test_paxos_services(metasync, opts):
    "test paxos with services"

    # init tmp repo to play with
    #test_init(metasync, opts)

    # init lock primitives
    lock = 'locktest/ltest2'
    targets = ["google", "box", "dropbox"]
    srvs = map(services.factory, targets)
    for srv in srvs:
        if not srv.exists(lock):
            srv.put(lock, '')

    from paxos import Proposer

    proposer = Proposer("1", srvs, lock)
    val = proposer.propose("1")
    assert val == "1"
    proposer.join()
示例#11
0
 def propose_value(self, prev, newvalue):
     from paxos import Proposer
     self.proposer = Proposer(self.clientid, self.services,
                              self.get_remote_path("pPaxos/" + prev))
     return self.proposer.propose(newvalue)
示例#12
0
    def cmd_init(self,
                 namespace,
                 backend=None,
                 nreplicas=None,
                 encrypt_key=None):
        # already initialized?
        if self.check_sanity():
            dbg.err("already initialized %s (%s)" \
                     % (self.path_root, self.namespace))
            return False

        os.mkdir(self.path_meta)
        os.mkdir(self.path_objs)

        # build config opts
        conf = util.new_config()

        # core: unique/permanent info about local machine (often called client)
        #   NOTE. not sure if encryption_key should be in core, or unchangable
        conf.add_section('core')
        conf.set('core', 'namespace', namespace)
        conf.set('core', 'clientid', util.gen_uuid())
        conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key))

        # backend: info about sync service providers
        # XXX: Error handling
        conf.add_section('backend')
        try:
            services = _get_conf_services(backend)
            conf.set('backend', 'services', services)
            conf.set('backend', 'nreplicas',
                     _get_conf_nreplicas(nreplicas, len(services.split(","))))
        except:
            pass

        # flush
        with open(self.path_conf, "w") as fd:
            conf.write(fd)

        try:
            self._load()
        except NameError:
            shutil.rmtree(self.path_meta)
            return False

        # put config into remote
        conf.remove_option('core', 'clientid')
        conf.remove_option('core', 'encryptkey')

        with io.BytesIO() as out:
            conf.write(out)
            val = out.getvalue()
            configname = util.sha1(val)
            self._put_all_content(
                val, self.get_remote_path("configs/%s" % configname[:6]), True)

            #temporary --- move this to pPaxos
            #self._put_all_content(configname[:6], self.get_remote_path("config"), True)

        # Format for master: headhash.config[:6].version
        prev_master = "." + configname[:6] + ".0"
        # do we need both? or shall we put them into a file together.
        with open(self.get_head(), "w") as f:
            f.write(prev_master)
        with open(self.get_prev(), "w") as f:
            f.write(prev_master)
        self._put_all_dir(self.get_remote_path("objects"))
        # change to put_content
        self._put_all(self.get_head(),
                      self.get_remote_path(self.get_head_name()))
        self._put_all(self.get_prev(),
                      self.get_remote_path(self.get_prev_name()))

        from paxos import Proposer
        self.proposer = Proposer(None, self.services,
                                 self.get_pPaxos_path(prev_master))
        self._join()

        return True
示例#13
0
 def propose_value(self, prev, newvalue):
     from paxos import Proposer
     self.proposer = Proposer(self.clientid, self.services, self.get_remote_path("pPaxos/"+prev))
     return self.proposer.propose(newvalue)
示例#14
0
class MetaSync:
    def __init__(self, root, opts=None):
        #
        #  repo/.metasync/
        #  ^    ^
        #  |    +-- meta
        #  +-- root

        # useful path info
        self.path_root   = self._find_root(root)
        self.path_meta   = os.path.join(self.path_root, META_DIR)
        self.path_conf   = self.get_path("config")
        self.path_objs   = self.get_path("objects")
        self.path_master = self.get_path("master")
        self.path_head_history = self.get_path("head_history")
        self.options     = opts

        # local blob store
        self.blobstore   = BlobStore2(self) #BlobStore(self.path_objs)

        # load on demand
        self.config      = None
        self.srvmap      = {}
        self.scheduler   = None
        self.translators = []
        self.mapping     = None

        # post init
        self._load()

    def _find_root(self, curpath):
        # find repo
        curpath = os.path.abspath(curpath)
        orgpath = curpath
        auth_dir = os.path.join(os.path.expanduser("~"), ".metasync")
        while True:
            path = os.path.join(curpath, META_DIR)
            if(path != auth_dir and os.path.exists(os.path.join(curpath, META_DIR))): return curpath
            sp = os.path.split(curpath)
            if(sp[1] == ""): break
            curpath = sp[0] 
        return orgpath


    @property
    def services(self):
        return self.srvmap.values()

    # load member variables from config
    def _load(self):
        if not self.check_sanity():
            return

        if(not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR)

        # load config
        self.config    = util.load_config(self.path_conf)
        self.namespace = self.config.get("core", "namespace")
        self.clientid  = self.config.get("core", "clientid")

        # load services from config
        self.srvmap = {}
        for tok in self.config.get("backend", "services").split(","):
            srv = services.factory(tok)
            self.srvmap[srv.sid()] = srv

        self.nreplicas = int(self.config.get("backend", "nreplicas"))
            
        nthreads = self.options.nthreads if self.options is not None else 2
        self.scheduler = Scheduler(self.services, (nthreads+1)*len(self.srvmap))

        # load translator pipe
        if self.is_encypted():
            self.translators.append(translators.TrEncrypt(self))

        # TODO. for integrity option
        # if self.is_signed():
        #     self.translators.append(TrSigned(self))

        beg = time.time()
        if(os.path.exists(self.get_path("mapping.pcl"))):
            with open(self.get_path("mapping.pcl")) as f:
                self.mapping = pickle.load(f)
        else:
            mapconfig = []
            for srv in self.services:
                mapconfig.append((srv.sid(), srv.info_storage()/GB))
            hspacesum = sum(map(lambda x:x[1], mapconfig))
            hspace = max(hspacesum+1, 1024)
            self.mapping = DetMap2(mapconfig, hspace=hspace, replica=self.nreplicas)
            self.mapping.pack()
            with open(self.get_path("mapping.pcl"), "w") as f:
                pickle.dump(self.mapping, f)
        end = time.time()
        dbg.time("mapping init %s" % (end-beg))
        dbg.dbg("head: %s", self.get_head_name())

    def cmd_reconfigure(self, backends, replica):
        srvmap = {}
        for tok in backends.split(","):
            srv = services.factory(tok)
            srvmap[srv.sid()] = srv
        lst_services = srvmap.values()
        mapconfig = []
        lock_dic = {}
        for srv in lst_services:
            mapconfig.append((srv.sid(), srv.info_storage()/GB))
        for srv in srvmap:
            lock_dic[srv] = threading.Lock()
            if srv not in self.srvmap:
                srvmap[srv].putdir(self.get_remote_path("objects"))
        for srv in self.srvmap:
            if srv not in lock_dic: 
                lock_dic[srv] = threading.Lock()

        beg = time.time()
        self.mapping.reconfig(mapconfig, eval(replica))
        end = time.time()
        dbg.info("remap: %.3fs" % (end-beg))
        beg = time.time()
        lst_objs = self.blobstore.list()
        added, removed = self.mapping.get_remapping(lst_objs)
        nthreads = self.options.nthreads if self.options is not None else 2

        #REFACTOR 
        def __put_next(srv, lst, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if(len(lst) == 0):
                    lock.release()
                    break
                next = lst.pop()
                lock.release()
                if next is not None:
                    with open(self.get_local_obj_path(next), "rb") as f:
                        blob = f.read()
                        for tr in self.translators:
                            blob = tr.put(blob)
                        # XXX HACK
                        backoff = 0.5
                        remote_path = self.get_remote_obj_path(next)
                        while not srv.put(remote_path, blob):
                            time.sleep(backoff)
                            backoff *= 2

        def __rm_next(srv, lst, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if(len(lst) == 0):
                    lock.release()
                    break
                next = lst.pop()
                lock.release()
                if next is not None:
                    remote_path = self.get_remote_obj_path(next)
                    srv.rm(remote_path)
        cnt_added = 0 
        for srv in added:
            if(len(added[srv]) == 0): continue
            cnt_added += len(added[srv])
            for i in range(nthreads):
                self.scheduler.submit(srvmap[srv], False, __put_next, added[srv], lock_dic[srv])   
        self._join()

        end = time.time()
        dbg.info("remap put: %.3fs" % (end-beg))

        beg = time.time()
        cnt_removed = 0
        for srv in removed:
            if(len(removed[srv]) == 0): continue
            cnt_removed += len(removed[srv])
            for i in range(nthreads):
                self.scheduler.submit(self.srvmap[srv], False, __rm_next, removed[srv], lock_dic[srv])   
        self._join()
        end = time.time()
        dbg.info("remap rm: %.3fs" % (end-beg))
        dbg.info("added %d, removed %d" % (cnt_added, cnt_removed))


    # config-related parser
    def is_encypted(self):
        key = self.config.get('core', 'encryptkey').strip()
        return key != ""

    # handling dir/path names
    def get_path(self, path):
        return os.path.join(self.path_meta, path)

    def get_head(self):
        return self.get_path(self.get_head_name())

    def get_head_name(self):
        return "head_%s" % self.get_client_id()

    def get_head_value(self):
        with open(self.get_head()) as f:
            return f.read().strip().split(".")[0]
        return None

    def get_head_and_config(self):
        with open(self.get_head()) as f:
            return f.read().strip()
        return None

    def get_prev(self):
        return self.get_path(self.get_prev_name())

    def get_prev_name(self):
        return "prev_%s" % self.get_client_id()

    def get_prev_value(self):
        with open(self.get_prev()) as f:
            return f.read().strip()
        return None

    def get_next_version(self):
        with open(self.get_prev()) as f:
            return int(f.read().strip().split(".")[2]) + 1
        return None

    #XXX: Cache?
    def get_config_hash(self):
        with open(self.get_head()) as f:
            return f.read().strip().split(".")[1]
        return None

    def get_client_id(self):
        return self.clientid

    def get_relative_path(self, path):
        return os.path.relpath(os.path.abspath(path), self.path_root)

    def get_local_path(self, *path):
        return os.path.join(self.path_root, *[p.strip("/") for p in path])

    def get_local_obj_path(self, hv):
        return os.path.join(self.path_objs, hv)

    def get_remote_path(self, *path):
        #return os.path.join(self.namespace, *path).rstrip("/")
        return "/".join([self.namespace] + list(path)).rstrip("/").rstrip("\\")

    def get_remote_obj_path(self, *hashes):
        return self.get_remote_path("objects", *hashes)

    def get_root_blob(self):
        return self.blobstore.get_root_blob()

    # check basic sanity of repo's meta info
    def check_sanity(self, whynot=False):
        def __err(why):
            if whynot:
                print >> sys.stderr, why
            return False
        if not os.path.exists(self.path_meta):
            return __err("Can't find the root of repo (%s)" % self.path_meta)
        if not os.path.exists(self.path_conf):
            return __err("Can't find config (%s)" % self.path_conf)
        if not os.path.exists(self.path_objs):
            return __err("Can't find objects store (%s)" % self.path_objs)
        return True

    # schedule-related
    def _put_all_content(self, content, remote_path, serial=False):
        def __put(srv):
            #dbg.job("submitted to: %s" % srv)
            srv.put(remote_path, content)

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, serial, __put)

    def _put_all_dir(self, remote_path):
        # XXX. handle errs
        def __putdir(srv):
            srv.putdir(remote_path)

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, True, __putdir)

    def _put_all(self, path, remote_path):
        # XXX. handle errs
        def __put(srv):
            with open(path, "rb") as f:
                srv.put(remote_path, f.read())

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, True, __put)

    def _update_all(self, path, remote_path):
        # XXX. handle errs
        def __update(srv):
            #dbg.job("submitted to: %s" % srv)
            with open(path, "rb") as f:
                #print 'start to put'
                srv.update(remote_path, f.read())
                #print 'put ends'

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, True, __update)

    def _join(self):
        self.scheduler.join()

    def _get(self, srv, path, remote_path):
        def __get(srv, path, remote_path):
            dbg.job("submitted to: %s (%s)" % (srv, path))
            with open(path,  "wb") as f:
                blob = srv.get(remote_path)
                if(blob is None):
                    time.sleep(1)
                    blob = srv.get(remote_path)
                for tr in reversed(self.translators):
                    blob = tr.get(blob)
                f.write(blob)

        self.scheduler.submit(srv, False, __get, path, remote_path)

    # bstore-related
    def bstore_download(self):
        # TODO, handle when R > 1
        lst = self.blobstore.list()
        #dbg.dbg("lst files:%s" % lst)

        lock = threading.Lock()
        def __get_next(srv, hash_dic, lock, allset, srvname):
            if(len(hash_dic[srvname]) == 0): return
            while True:
                lock.acquire()
                try:
                    next = hash_dic[srvname].pop()
                    l = len(hash_dic[srvname])
                    if(l%10 == 0):
                        dbg.dbg("%s left %d" % (srvname, l))
                    if(next not in allset):
                        allset.add(next)
                    else:
                        next = None
                except:
                    lock.release()
                    break
                lock.release()
                if(next is not None):
                    remote_path = self.get_remote_obj_path(next)
                    path = os.path.join(self.path_objs, next)
                    with open(path, "wb") as f:
                        backoff = 0.5
                        while True:
                            blob = srv.get(remote_path)
                            if(blob is not None): break
                            dbg.dbg("back off %s" % srvname)
                            time.sleep(backoff)
                            backoff*=2
                            
                        for tr in reversed(self.translators):
                            blob = tr.get(blob)
                        f.write(blob)

        hash_dic = {}
        allset = set([])
        for srv in self.services:
            hash_dic[str(srv)] = []
            srvlist = srv.listdir(self.get_remote_obj_path())
            backoff = 1
            while srvlist is None:
                dbg.dbg("back off - listdir %s" % str(srv))
                time.sleep(backoff)
                srvlist = srv.listdir(self.get_remote_obj_path())

            for hashname in srvlist:
                if(hashname in lst):
                    #dbg.dbg("%s is already in bstore" % hashname)
                    continue
                hash_dic[str(srv)].append(hashname)

        nthreads = self.options.nthreads if self.options is not None else 2
        for srv in self.services:
            dbg.dbg("%s:%d dn" % (str(srv), len(hash_dic[str(srv)])))
            ##HACK
            for i in range(nthreads):
                self.scheduler.submit(srv, False, __get_next, hash_dic, lock, allset, str(srv))

    def bstore_sync_left(self, hashdic):
        cnt = 0
        for i in hashdic:
            cnt += len(hashdic[i])
        if(cnt == 0): return

        def __put_next(srv, lst, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if(len(lst) == 0):
                    lock.release()
                    break
                next = lst.pop()
                lock.release()
                if next is not None:
                    with open(self.get_local_obj_path(next), "rb") as f:
                        blob = f.read()
                        for tr in self.translators:
                            blob = tr.put(blob)
                        # XXX HACK
                        backoff = 0.5
                        remote_path = self.get_remote_obj_path(next)
                        while not srv.put(remote_path, blob):
                            time.sleep(backoff)
                            backoff *= 2

        lock_dic = {}
        for i in hashdic:
            lock_dic[i] = threading.Lock()

        nthreads = self.options.nthreads if self.options is not None else 2
        for srv in hashdic:
            for i in range(nthreads):
                self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic[srv], lock_dic[srv])   

        self._join()

    #XXX: it needs to return after one set is put, and continue on replication. 
    def bstore_sync(self, hashnames):
        dbg.dbg("need to sync: %s..@%d" % (hashnames[0], len(hashnames)))
        def __put_next(srv, hashdic, hashdic_left, allset, key, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if(len(hashdic[key]) == 0 or len(allset) == 0):
                    lock.release()
                    break

                next = hashdic[key].pop()
                if(next in allset):
                    allset.remove(next)
                else:
                    hashdic_left[key].append(next)
                    next = None
                lock.release()
                if next is not None:
                    with open(self.get_local_obj_path(next), "rb") as f:
                        blob = f.read()
                        for tr in self.translators:
                            blob = tr.put(blob)
                        # XXX HACK
                        backoff = 0.5
                        remote_path = self.get_remote_obj_path(next)
                        while not srv.put(remote_path, blob):
                            dbg.dbg("backoff %s" % srv)
                            time.sleep(backoff)
                            backoff *= 2

        nthreads = self.options.nthreads if self.options is not None else 2
        hashdic = {}
        hashdic_left = {}
        allset = set()
        lock = threading.Lock()
        for srv in self.srvmap:
            hashdic[srv] = []
            hashdic_left[srv] = []

        for hashname in hashnames:
            allset.add(hashname)
            for i in self.mapping.get_mapping(hashname):
                hashdic[i].append(hashname)

        for srv in hashdic:
            for i in range(nthreads):
                self.scheduler.submit(self.srvmap[srv], False, __put_next, hashdic, hashdic_left, allset, srv, lock)   

        self._join()
        return hashdic_left        

    # iterate bstore
    def bstore_iter(self):
        for root, dirs, files in os.walk(self.path_objs):
            for name in files:
                yield name

    def bstore_iter_remote(self, srv):
        assert srv in self.services

        # NOTE. at some point, we need cascaded directory hierarchy
        for obj in srv.listdir(self.get_remote_obj_path()):
            yield obj

    #XXX. update only changed files (SY)
    def restore_from_master(self):
        root = self.get_root_blob()
        dbg.dbg("restore")
        for name, blob in root.walk():
            pn = os.path.join(self.path_root, name)
            if blob.thv == "F":
                content = blob.read()
                util.write_file(pn, content.getvalue())
                content.close()
            if blob.thv == "m":
                content = blob.read()
                util.write_file(pn, content)
            elif blob.thv == "D" or blob.thv == "M":
                try:
                    os.mkdir(pn)
                except:
                    pass
        return True

    def propose_value(self, prev, newvalue):
        from paxos import Proposer
        self.proposer = Proposer(self.clientid, self.services, self.get_remote_path("pPaxos/"+prev))
        return self.proposer.propose(newvalue)

    # need to truncate if history is too long.
    def get_history(self, is_master=False): 
        pn = self.path_master_history if is_master else self.path_head_history
        content = util.read_file(pn).strip()
        if content:
            history = content.split("\n")
            history.reverse()
        else:
            history = []
        
        return history

    def get_common_ancestor(self, head_history, master_history, known_common_history=None):
        # change to use known_common_history
        for head in head_history:
            if(head in master_history):
                return head
        return None

    def try_merge(self, head_history, master_history):
        # this need to be fixed.
        dbg.dbg("Trying to merge")
        # we may need to cache the last branched point
        common = self.get_common_ancestor(head_history, master_history)
        dbg.dbg("%s %s %s", head_history[0], master_history[0], common)
        common = self.blobstore.get_blob(common, "D")
        head = self.get_root_blob()
        master = self.blobstore.get_blob(master_history[0], "D")

        added1 = head.diff(common) 
        added2 = master.diff(common)

        def intersect(a, b):
            return list(set(a) & set(b))

        if(len(intersect(added1.keys(), added2.keys())) != 0):
            dbg.err("both modified--we need to handle it")
            return False
        for i in added2.keys():
            path = os.path.join(self.path_root, i)
            dirblob = self.blobstore.load_dir(os.path.dirname(path), dirty=True)
            dirblob.add(os.path.basename(path), added2[i], dirty=False)

        # HACK, need to go through all the non-overlapped history.
        self.append_history(master.hv)
        head.store()
        self.append_history(head.hv)
        # HACK, need to be changed
        newblobs = self.blobstore.get_added_blobs() 

        # push new blobs remotely
        self.bstore_sync(newblobs)
        self._join()

        return True

    def get_uptodate_master(self, includeself=True, srv=None):
        # copy all the heads. --- it should have version number. or something to compare against each other.
        if srv == None:
            srv = self.services[0]
        prev_clients = filter(lambda x:x.startswith("prev_"), srv.listdir(self.get_remote_path("")))
        pointers = set()
        for prev in prev_clients: 
            if not includeself or not prev.endswith(self.clientid):
                with open(self.get_path(prev), "w") as f:
                    pointer = srv.get(self.get_remote_path(prev))
                    pointers.add(pointer)
                    f.write(pointer)
        if includeself:
            pointers.add(self.get_prev_value()) 

        return max(pointers, key=lambda x:int(x.split(".")[2]))

    def check_master_uptodate(self):
        srv = self.services[0]
        remote_master = srv.get(self.get_remote_path("master"))
        with open(self.path_master) as f:
            master_head = f.read().strip()
        if(master_head != remote_master): return False
        return True

    def cmd_poll(self):
        srv = self.services[0]
        srv.poll(self.namespace)

    #
    # end-user's interfaces (starting with cmd_ prefix)
    #   NOTE. explicitly return True/False to indicate status of 'cmd'
    #

    def cmd_share(self, target_email):
        if not self.check_sanity():
            dbg.err("this is not metasync repo")
            return False

        for srv in self.services:
            srv.share(self.namespace, target_email)

    def cmd_diff(self):
        # work only for 1-level directory
        # need to add diff for file
        if not self.check_sanity():
            dbg.err("this is not metasync repo")
            return False
        root = self.get_root_blob()
        added = []
        removed = []
        files = os.listdir(".")
        for f in files:
            if(f == ".metasync"): continue
            if("/"+f not in root.files):
                added.append(f)

        for f in root.files:
            if(f[1:] not in files):
                removed.append(f[1:])

        for f in added:
            print("+++ %s" % f)

        for f in removed:
            print("--- %s" % f)

    def cmd_mv(self, src_pn, dst_pn):
        if not self.check_sanity():
            dbg.err("it's not a metasync repo.")
            return False
        src_pn = os.path.abspath(src_pn)
        dst_pn = os.path.abspath(dst_pn)

        #TODO: check src_pn exists
        beg = time.time()
        try:
            dirname = os.path.dirname(src_pn)
            dirblob = self.blobstore.load_dir(dirname, False, dirty=True)
            if(dirblob is None):
                dbg.err("%s does not exist" % src_pn)
                return False
        except NotTrackedException as e:
            dbg.err(str(e))
            return False

        fname = os.path.basename(src_pn)
        if(not fname in dirblob): 
            dbg.err("%s does not exist" % pn)
            return False
        fblob = dirblob[fname]
        dirblob.rm(fname)

        dst_dirname = os.path.dirname(dst_pn)
        if(dirname != dst_dirname):
            dirblob = self.blobstore.load_dir(dirname, True, dirty=True)
            assert dirblob is not None

        dst_fname = os.path.basename(dst_pn)
        dirblob.add(dst_fname, fblob, dirty=False)

        root = self.get_root_blob()
        root.store()
        newblobs = self.blobstore.get_added_blobs()

        util.write_file(self.get_head(), root.hv)
        self.append_history(root.hv)

        end = time.time()
        dbg.time("local write: %f" % (end-beg))

        # push new blobs remotely
        self.bstore_sync(newblobs)
        self._put_all(self.get_head(), self.get_remote_path(self.get_head_name()))

        end = time.time()
        dbg.time("remote write: %f" % (end-beg))

        # move the file
        shutil.move(src_pn, dst_pn)
        self._join()

        return True 

    def cmd_peek(self):
        root = self.get_root_blob()
        for i in root.walk():
            print(i)
        # print("hash: %s" % root.hash_head)
        # print(root.dump_info())
        # with open(self.path_master) as f:
        #     master_head = f.read().strip()
        # with open(self.get_head()) as f:
        #     head = f.read().strip()
        # print("head_history %s" % ",".join(self.get_history(head)))
        # print("master_history %s" %  ",".join(self.get_history(master_head)))

    def cmd_fetch(self):
        if not self.check_sanity():
            dbg.err("it's not a metasync repo.")
            return False

        # TODO: change it into comparing between masters
        self.bstore_download()

        self._join()
        return True

    def update_changed(self, head, master):
        def _file_create(blob, pn): 
            if(blob.thv == "D" or blob.thv == "M"):
                util.mkdirs(pn)
                for i in blob.entries:
                    _file_create(blob[i], os.path.join(pn, i))
            elif(blob.thv == "F"):
                content = blob.read()
                util.write_file(pn, content.getvalue())
                content.close()
                # touch metadata blob (for cmd_status)
                os.utime(os.path.join(self.path_objs, blob.hv), None)
            elif(blob.thv == "m"):
                content = blob.read()
                util.write_file(pn, content)
                # touch metadata blob (for cmd_status)
                os.utime(os.path.join(self.path_objs, blob.hv), None)
            else:
                assert False

        def _update(old_dirblob, new_dirblob, path):
            for fname in new_dirblob.entries:
                blob = new_dirblob[fname]
                if(fname not in old_dirblob): 
                    _file_create(blob, os.path.join(path, fname))
                elif(blob.hv != old_dirblob[fname].hv):
                    if(blob.thv == "D"):
                        _update(old_dirblob[fname], blob, os.path.join(path, fname))
                    elif(blob.thv == "F"): 
                        _file_create(blob, os.path.join(path, fname))
                    else:
                        print(blob.thv)
                        assert False

        headblob = self.blobstore.get_blob(head, "D")
        masterblob = self.blobstore.get_blob(master, "D")
        _update(headblob, masterblob, self.path_root)

    def update_head_and_prev(self, master):
        with open(self.get_prev(), "w") as f:
            f.write(master)
        with open(self.get_head(), "w") as f:
            f.write(master)

    def cmd_update(self):
        master = self.get_uptodate_master()
        # already up-to-date
        prev = self.get_prev_value()
        if (master == prev):
            return True

        head = self.get_head_and_config()
        # XXX: need to check if non-checked in but modified files.
        if (head == prev):
            self.update_changed(head.split(".")[0], master.split(".")[0])
        else:
            ### need to merge
            raise Exception('Merge required')

        self.update_head_and_prev(master)
        self.blobstore.rootblob = None
        dbg.info("update done %s" % time.ctime())
        return True

    #XXX: Seungyeop is working on it.
    def cmd_clone(self, namespace, backend=None, encrypt_key=None):
        # if wrong target
        if self.check_sanity():
            return False

        # reset all the path by including the namespace
        self.path_root   = os.path.join(self.path_root, namespace)
        self.path_meta   = os.path.join(self.path_root, META_DIR)
        self.path_conf   = self.get_path("config")
        self.path_objs   = self.get_path("objects")
        #self.path_head_history = self.get_path("head_history")

        if os.path.exists(self.path_root):
            dbg.err("%s already exists." % self.path_root)
            return False

        if backend is None:
            print "input one of the storage backends, (e.g., dropbox,google,box)"
            print "  for testing, use disk@/path (e.g., disk@/tmp)"
            backend = raw_input("> ")

        srv  = services.factory(backend)
        self.namespace = namespace

        # create repo directory
        os.mkdir(self.path_root)
        os.mkdir(self.path_meta)
        os.mkdir(self.path_objs)

        curmaster = self.get_uptodate_master(False, srv)
        sp = curmaster.split(".")
        master = sp[0]
        seed = sp[1]
        seed = srv.get(self.get_remote_path("configs/%s" % seed))
        conf = util.loads_config(seed)

        # setup client specific info
        conf.set('core', 'clientid'  , util.gen_uuid())
        conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key))

        with open(self.path_conf, "w") as fd:
            conf.write(fd)

        self._load()
        beg = time.time()
        self.bstore_download()
        self._join()

        with open(self.get_head(), "w") as f:
            f.write(curmaster)
        with open(self.get_prev(), "w") as f:
            f.write(curmaster)

        # send my head to remote
        self._put_all(self.get_head(), self.get_remote_path(self.get_head_name()))
        self._put_all(self.get_prev(), self.get_remote_path(self.get_prev_name()))
        self._join()

        if (master):
            ret = self.restore_from_master()
        end = time.time()
        dbg.dbg("clone: %ss" % (end-beg))
        return True

    def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None):
        # already initialized?
        if self.check_sanity():
            dbg.err("already initialized %s (%s)" \
                     % (self.path_root, self.namespace))
            return False

        os.mkdir(self.path_meta)
        os.mkdir(self.path_objs)

        # build config opts
        conf = util.new_config()

        # core: unique/permanent info about local machine (often called client)
        #   NOTE. not sure if encryption_key should be in core, or unchangable
        conf.add_section('core')
        conf.set('core', 'namespace' , namespace)
        conf.set('core', 'clientid'  , util.gen_uuid())
        conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key))

        # backend: info about sync service providers
        # XXX: Error handling
        conf.add_section('backend')
        try:
            services = _get_conf_services(backend)
            conf.set('backend', 'services' , services)
            conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(","))))
        except:
            pass

        # flush
        with open(self.path_conf, "w") as fd:
            conf.write(fd)

        try: 
            self._load()
        except NameError:
            shutil.rmtree(self.path_meta)
            return False

        # put config into remote
        conf.remove_option('core','clientid')
        conf.remove_option('core','encryptkey')

        with io.BytesIO() as out:
            conf.write(out)
            val = out.getvalue()
            configname = util.sha1(val) 
            self._put_all_content(val, self.get_remote_path("configs/%s" % configname[:6]), True)

            #temporary --- move this to pPaxos
            #self._put_all_content(configname[:6], self.get_remote_path("config"), True)

        # Format for master: headhash.config[:6].version
        prev_master = "." + configname[:6] + ".0"
        # do we need both? or shall we put them into a file together.
        with open(self.get_head(), "w") as f:
            f.write(prev_master)
        with open(self.get_prev(), "w") as f:
            f.write(prev_master)
        self._put_all_dir(self.get_remote_path("objects"))
        # change to put_content
        self._put_all(self.get_head() , self.get_remote_path(self.get_head_name()))
        self._put_all(self.get_prev() , self.get_remote_path(self.get_prev_name()))

        from paxos import Proposer
        self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master))
        self._join()

        return True

    def get_pPaxos_path(self, path):
        return self.get_remote_path("pPaxos/" + path)


    def cmd_gc(self):
        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False

        def _find_all_blobs(blob, tracked):
            # we may need to move this to blobstore
            if(blob.hv in tracked): return
            tracked.add(blob.hv)
            if(blob.thv == "C"): return
            for name, childblob in blob.entries.iteritems():
                _find_all_blobs(childblob, tracked)

        # check head
        head = self.get_head_value()
        tracked = set([])
        if(head is not None and len(head)>0):
            blob = self.blobstore.get_blob(head, "D") 
            _find_all_blobs(blob, tracked)
        # check master
        with open(self.path_master) as f:
            master_head = f.read().strip()
        if(len(master_head) > 0):
            blob = self.blobstore.get_blob(master_head, "D") 
            _find_all_blobs(blob, tracked)

        allblobs = set(self.blobstore.list())

        # remove following 
        blobs_to_remove = allblobs - tracked
        
        def __rm(srv, remote_path):
            dbg.job("submitted to: %s (%s)" % (srv, remote_path))
            srv.rm(remote_path)

        for hashname in blobs_to_remove:
            for i in self.mapping.get_mapping(hashname):
                self.scheduler.submit(self.srvmap[i], True, __rm, self.get_remote_obj_path(hashname))
            os.unlink(self.get_local_obj_path(hashname)) 

        return True 

    def cmd_rm(self, pn):
        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False
        #TODO: check if the file exists

        beg = time.time()
        try:
            dirname = os.path.dirname(pn)
            dirblob = self.blobstore.load_dir(dirname, False)
            if(dirblob is None):
                dbg.err("%s does not exist" % pn)
                return False
        except NotTrackedException as e:
            dbg.err(str(e))
            return False

        fname = os.path.basename(pn)
        if(not fname in dirblob): 
            dbg.err("%s does not exist" % pn)
            return False

        dirblob.rm(fname)
        root = self.get_root_blob()
        root.store()
        newblobs = self.blobstore.get_added_blobs()

        # we may need to include pointer for previous version.
        util.write_file(self.get_head(), root.hv)
        self.append_history(root.hv)

        end = time.time()
        dbg.time("local write: %f" % (end-beg))

        # push new blobs remotely
        self.bstore_sync(newblobs)
        self._put_all(self.get_head(), self.get_remote_path(self.get_head_name()))

        end = time.time()
        dbg.time("remote write: %f" % (end-beg))
        self._join()

        # drop local copy
        # TODO: rm only tracked files if removing file.
        try:
            os.unlink(pn)
        except:
            dbg.err("failed to rm %s" % pn)
            return False

        return True

    def append_history(self, hv):
        util.append_file(self.path_head_history, hv+"\n")

    def cmd_checkin(self, paths, unit=BLOB_UNIT, upload_only_first=False):
        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False
        if type(paths) != types.ListType:
            paths = [paths] 
        for pn in paths: 
            if not os.path.exists(pn):
                dbg.err("File %s doesn't exits." % pn)
                return False
            
        beg = time.time()
        #XXX: considering mtime, check hash of chunks?
        changed = False
        for path in paths:
            if(not os.path.isfile(path)): 
                changed = True
                for root, dirs, files in os.walk(path):
                    fsizesum = 0
                    for fname in files:
                        fsizesum += os.stat(os.path.join(root,fname)).st_size
                    print(root + " " + str(fsizesum))
                    if(fsizesum < unit):
                        dirblob = self.blobstore.load_dir(root, dirty=True, merge=True)
                        for fname in files:
                            dirblob.add_file(fname, os.path.join(root, fname))
                        dirblob.done_adding()
                    else:
                        dirblob = self.blobstore.load_dir(root, dirty=True)
                        for fname in files:
                            fileblob = self.blobstore.load_file(os.path.join(root, fname), unit)
                            if(fname in dirblob and dirblob[fname].hv == fileblob.hv):
                                continue
                            dirblob.add(fname, fileblob)
            else:
                fileblob = self.blobstore.load_file(path, unit)
                dirname = os.path.dirname(path)
                if(dirname == ""): dirname = "."
                dirblob = self.blobstore.load_dir(dirname, dirty=True)
                fname = os.path.basename(path)
                if(fname in dirblob and dirblob[fname].hv == fileblob.hv):
                    continue
                changed = True
                dirblob.add(fname, fileblob)
        if(not changed): return True
        root = self.get_root_blob()
        root.store()
        newblobs = self.blobstore.get_added_blobs()

        util.write_file(self.get_head(), "%s.%s.%d" % (root.hv, self.get_config_hash(), self.get_next_version()))

        end = time.time()
        dbg.time("local write: %f" % (end-beg))

        # push new blobs remotely
        leftover = self.bstore_sync(newblobs)
        self._update_all(self.get_head(), self.get_remote_path(self.get_head_name()))

        self._join()
        end = time.time()
        dbg.time("remote write for R1: %f" % (end-beg))
        if(not upload_only_first):
            self.bstore_sync_left(leftover)
            end = time.time()
            dbg.time("remote write for left: %f" % (end-beg))
            return []
        else:
            return leftover


    def cmd_push(self):
        prev = self.get_prev_value()
        newvalue = self.get_head_and_config()
        val = self.propose_value(prev, newvalue)
        if(val != newvalue):
            dbg.err("You should fetch first")
            return False

        # with open(self.path_master) as f:
        #     master_head = f.read().strip()
        # with open(self.get_head()) as f:
        #     head = f.read().strip()
        # if(len(master_head) > 0):
        #     head_history = self.get_history()
        #     if(not master_head in head_history):
        #         dbg.err("You should update first")
        #         self.unlock_master()
        #         return False
        # check master is ancestor of the head

        shutil.copyfile(self.get_head(), self.get_prev())
        self._update_all(self.get_prev(), self.get_remote_path(self.get_prev_name()))
        from paxos import Proposer
        self.proposer = Proposer(None, self.services, self.get_pPaxos_path(newvalue))
        self._join()
        return True

    def cmd_status(self, unit=BLOB_UNIT):

        def simple_walk(folder):
        # simple_walk will skip dipping into the folder 
        # that are not tracked in the repo
            untracked = []
            changed = []

            for f in os.listdir(folder):
                if f == META_DIR:
                    continue
                basename = os.path.basename(folder)
                if basename == '.' or basename == '':
                    relpath = f
                else:
                    relpath = os.path.join(folder, f)
                if relpath in tracked:
                    if os.path.isdir(f):
                        _untracked, _changed = simple_walk(relpath)
                        untracked.extend(_untracked)
                        changed.extend(_changed)
                    else:
                        fblob = tracked[relpath]
                        # compare the file modified time and its metadata blob modified time
                        curr_mtime = os.path.getmtime(relpath)
                        last_mtime = os.path.getmtime(os.path.join(self.path_objs, fblob.hv))
                        if curr_mtime > last_mtime:
                            # only load file when the file modified time is greater than metadata modified time
                            fblob._load()
                            flag = False
                            # compare chunk hash
                            for (offset, chunk) in util.each_chunk2(relpath, unit):
                                if util.sha1(chunk) != fblob.entries[offset].hv:
                                    flag = True
                                    break
                            if flag:
                                changed.append(relpath)
                else:
                    if os.path.isdir(relpath):
                        relpath = os.path.join(relpath, '')
                    untracked.append(relpath)
            return untracked, changed

        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False

        # switch to metasync repo root folder
        os.chdir(self.path_root)

        # compare the head and master history
        head_history = self.get_history()
        master_history = self.get_history(True)
        head_diverge = 0
        for head in head_history:
            if (head in master_history):
                break
            head_diverge += 1
        if head_diverge == len(head_history):
            master_diverge = len(master_history)
        else:
            master_diverge = master_history.index(head_history[head_diverge])

        if head_diverge == 0 and master_diverge == 0:
            print "\nYour branch is up-to-date with master."
        elif head_diverge == 0:
            print "\nYour branch is behind master by %d commit(s)." % master_diverge
        elif master_diverge == 0:
            print "\nYour branch is ahead of master by %d commit(s)." % head_diverge
        else:
            print "\nYour branch and master have diverged,"
            print "and have %d and %d different commits each, respectively" % (head_diverge, master_diverge)

        root = self.get_root_blob()
        tracked = {}
        for (path, blob) in root.walk():
            tracked[path] = blob

        untracked, changed = simple_walk('.')
        if changed:
            print("\nChanges not checked in:")
            for f in changed:
                print("\033[31m\tmodified:   %s\033[m" % f)

        if untracked:
            print("\nUntracked files:")
            for f in untracked:
                print("\033[31m\t%s\033[m" % f)

        return True
示例#15
0
    def cmd_init(self, namespace, backend=None, nreplicas=None, encrypt_key=None):
        # already initialized?
        if self.check_sanity():
            dbg.err("already initialized %s (%s)" \
                     % (self.path_root, self.namespace))
            return False

        os.mkdir(self.path_meta)
        os.mkdir(self.path_objs)

        # build config opts
        conf = util.new_config()

        # core: unique/permanent info about local machine (often called client)
        #   NOTE. not sure if encryption_key should be in core, or unchangable
        conf.add_section('core')
        conf.set('core', 'namespace' , namespace)
        conf.set('core', 'clientid'  , util.gen_uuid())
        conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key))

        # backend: info about sync service providers
        # XXX: Error handling
        conf.add_section('backend')
        try:
            services = _get_conf_services(backend)
            conf.set('backend', 'services' , services)
            conf.set('backend', 'nreplicas', _get_conf_nreplicas(nreplicas, len(services.split(","))))
        except:
            pass

        # flush
        with open(self.path_conf, "w") as fd:
            conf.write(fd)

        try: 
            self._load()
        except NameError:
            shutil.rmtree(self.path_meta)
            return False

        # put config into remote
        conf.remove_option('core','clientid')
        conf.remove_option('core','encryptkey')

        with io.BytesIO() as out:
            conf.write(out)
            val = out.getvalue()
            configname = util.sha1(val) 
            self._put_all_content(val, self.get_remote_path("configs/%s" % configname[:6]), True)

            #temporary --- move this to pPaxos
            #self._put_all_content(configname[:6], self.get_remote_path("config"), True)

        # Format for master: headhash.config[:6].version
        prev_master = "." + configname[:6] + ".0"
        # do we need both? or shall we put them into a file together.
        with open(self.get_head(), "w") as f:
            f.write(prev_master)
        with open(self.get_prev(), "w") as f:
            f.write(prev_master)
        self._put_all_dir(self.get_remote_path("objects"))
        # change to put_content
        self._put_all(self.get_head() , self.get_remote_path(self.get_head_name()))
        self._put_all(self.get_prev() , self.get_remote_path(self.get_prev_name()))

        from paxos import Proposer
        self.proposer = Proposer(None, self.services, self.get_pPaxos_path(prev_master))
        self._join()

        return True
示例#16
0
class MetaSync:
    def __init__(self, root, opts=None):
        #
        #  repo/.metasync/
        #  ^    ^
        #  |    +-- meta
        #  +-- root

        # useful path info
        self.path_root = self._find_root(root)
        self.path_meta = os.path.join(self.path_root, META_DIR)
        self.path_conf = self.get_path("config")
        self.path_objs = self.get_path("objects")
        self.path_master = self.get_path("master")
        self.path_head_history = self.get_path("head_history")
        self.options = opts

        # local blob store
        self.blobstore = BlobStore2(self)  #BlobStore(self.path_objs)

        # load on demand
        self.config = None
        self.srvmap = {}
        self.scheduler = None
        self.translators = []
        self.mapping = None

        # post init
        self._load()

    def _find_root(self, curpath):
        # find repo
        curpath = os.path.abspath(curpath)
        orgpath = curpath
        auth_dir = os.path.join(os.path.expanduser("~"), ".metasync")
        while True:
            path = os.path.join(curpath, META_DIR)
            if (path != auth_dir
                    and os.path.exists(os.path.join(curpath, META_DIR))):
                return curpath
            sp = os.path.split(curpath)
            if (sp[1] == ""): break
            curpath = sp[0]
        return orgpath

    @property
    def services(self):
        return self.srvmap.values()

    # load member variables from config
    def _load(self):
        if not self.check_sanity():
            return

        if (not os.path.exists(AUTH_DIR)): os.mkdir(AUTH_DIR)

        # load config
        self.config = util.load_config(self.path_conf)
        self.namespace = self.config.get("core", "namespace")
        self.clientid = self.config.get("core", "clientid")

        # load services from config
        self.srvmap = {}
        for tok in self.config.get("backend", "services").split(","):
            srv = services.factory(tok)
            self.srvmap[srv.sid()] = srv

        self.nreplicas = int(self.config.get("backend", "nreplicas"))

        nthreads = self.options.nthreads if self.options is not None else 2
        self.scheduler = Scheduler(self.services,
                                   (nthreads + 1) * len(self.srvmap))

        # load translator pipe
        if self.is_encypted():
            self.translators.append(translators.TrEncrypt(self))

        # TODO. for integrity option
        # if self.is_signed():
        #     self.translators.append(TrSigned(self))

        beg = time.time()
        if (os.path.exists(self.get_path("mapping.pcl"))):
            with open(self.get_path("mapping.pcl")) as f:
                self.mapping = pickle.load(f)
        else:
            mapconfig = []
            for srv in self.services:
                mapconfig.append((srv.sid(), srv.info_storage() / GB))
            hspacesum = sum(map(lambda x: x[1], mapconfig))
            hspace = max(hspacesum + 1, 1024)
            self.mapping = DetMap2(mapconfig,
                                   hspace=hspace,
                                   replica=self.nreplicas)
            self.mapping.pack()
            with open(self.get_path("mapping.pcl"), "w") as f:
                pickle.dump(self.mapping, f)
        end = time.time()
        dbg.time("mapping init %s" % (end - beg))
        dbg.dbg("head: %s", self.get_head_name())

    def cmd_reconfigure(self, backends, replica):
        srvmap = {}
        for tok in backends.split(","):
            srv = services.factory(tok)
            srvmap[srv.sid()] = srv
        lst_services = srvmap.values()
        mapconfig = []
        lock_dic = {}
        for srv in lst_services:
            mapconfig.append((srv.sid(), srv.info_storage() / GB))
        for srv in srvmap:
            lock_dic[srv] = threading.Lock()
            if srv not in self.srvmap:
                srvmap[srv].putdir(self.get_remote_path("objects"))
        for srv in self.srvmap:
            if srv not in lock_dic:
                lock_dic[srv] = threading.Lock()

        beg = time.time()
        self.mapping.reconfig(mapconfig, eval(replica))
        end = time.time()
        dbg.info("remap: %.3fs" % (end - beg))
        beg = time.time()
        lst_objs = self.blobstore.list()
        added, removed = self.mapping.get_remapping(lst_objs)
        nthreads = self.options.nthreads if self.options is not None else 2

        #REFACTOR
        def __put_next(srv, lst, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if (len(lst) == 0):
                    lock.release()
                    break
                next = lst.pop()
                lock.release()
                if next is not None:
                    with open(self.get_local_obj_path(next), "rb") as f:
                        blob = f.read()
                        for tr in self.translators:
                            blob = tr.put(blob)
                        # XXX HACK
                        backoff = 0.5
                        remote_path = self.get_remote_obj_path(next)
                        while not srv.put(remote_path, blob):
                            time.sleep(backoff)
                            backoff *= 2

        def __rm_next(srv, lst, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if (len(lst) == 0):
                    lock.release()
                    break
                next = lst.pop()
                lock.release()
                if next is not None:
                    remote_path = self.get_remote_obj_path(next)
                    srv.rm(remote_path)

        cnt_added = 0
        for srv in added:
            if (len(added[srv]) == 0): continue
            cnt_added += len(added[srv])
            for i in range(nthreads):
                self.scheduler.submit(srvmap[srv], False, __put_next,
                                      added[srv], lock_dic[srv])
        self._join()

        end = time.time()
        dbg.info("remap put: %.3fs" % (end - beg))

        beg = time.time()
        cnt_removed = 0
        for srv in removed:
            if (len(removed[srv]) == 0): continue
            cnt_removed += len(removed[srv])
            for i in range(nthreads):
                self.scheduler.submit(self.srvmap[srv], False, __rm_next,
                                      removed[srv], lock_dic[srv])
        self._join()
        end = time.time()
        dbg.info("remap rm: %.3fs" % (end - beg))
        dbg.info("added %d, removed %d" % (cnt_added, cnt_removed))

    # config-related parser
    def is_encypted(self):
        key = self.config.get('core', 'encryptkey').strip()
        return key != ""

    # handling dir/path names
    def get_path(self, path):
        return os.path.join(self.path_meta, path)

    def get_head(self):
        return self.get_path(self.get_head_name())

    def get_head_name(self):
        return "head_%s" % self.get_client_id()

    def get_head_value(self):
        with open(self.get_head()) as f:
            return f.read().strip().split(".")[0]
        return None

    def get_head_and_config(self):
        with open(self.get_head()) as f:
            return f.read().strip()
        return None

    def get_prev(self):
        return self.get_path(self.get_prev_name())

    def get_prev_name(self):
        return "prev_%s" % self.get_client_id()

    def get_prev_value(self):
        with open(self.get_prev()) as f:
            return f.read().strip()
        return None

    def get_next_version(self):
        with open(self.get_prev()) as f:
            return int(f.read().strip().split(".")[2]) + 1
        return None

    #XXX: Cache?
    def get_config_hash(self):
        with open(self.get_head()) as f:
            return f.read().strip().split(".")[1]
        return None

    def get_client_id(self):
        return self.clientid

    def get_relative_path(self, path):
        return os.path.relpath(os.path.abspath(path), self.path_root)

    def get_local_path(self, *path):
        return os.path.join(self.path_root, *[p.strip("/") for p in path])

    def get_local_obj_path(self, hv):
        return os.path.join(self.path_objs, hv)

    def get_remote_path(self, *path):
        #return os.path.join(self.namespace, *path).rstrip("/")
        return "/".join([self.namespace] + list(path)).rstrip("/").rstrip("\\")

    def get_remote_obj_path(self, *hashes):
        return self.get_remote_path("objects", *hashes)

    def get_root_blob(self):
        return self.blobstore.get_root_blob()

    # check basic sanity of repo's meta info
    def check_sanity(self, whynot=False):
        def __err(why):
            if whynot:
                print >> sys.stderr, why
            return False

        if not os.path.exists(self.path_meta):
            return __err("Can't find the root of repo (%s)" % self.path_meta)
        if not os.path.exists(self.path_conf):
            return __err("Can't find config (%s)" % self.path_conf)
        if not os.path.exists(self.path_objs):
            return __err("Can't find objects store (%s)" % self.path_objs)
        return True

    # schedule-related
    def _put_all_content(self, content, remote_path, serial=False):
        def __put(srv):
            #dbg.job("submitted to: %s" % srv)
            srv.put(remote_path, content)

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, serial, __put)

    def _put_all_dir(self, remote_path):
        # XXX. handle errs
        def __putdir(srv):
            srv.putdir(remote_path)

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, True, __putdir)

    def _put_all(self, path, remote_path):
        # XXX. handle errs
        def __put(srv):
            with open(path, "rb") as f:
                srv.put(remote_path, f.read())

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, True, __put)

    def _update_all(self, path, remote_path):
        # XXX. handle errs
        def __update(srv):
            #dbg.job("submitted to: %s" % srv)
            with open(path, "rb") as f:
                #print 'start to put'
                srv.update(remote_path, f.read())
                #print 'put ends'

        # submit jobs
        for srv in self.services:
            self.scheduler.submit(srv, True, __update)

    def _join(self):
        self.scheduler.join()

    def _get(self, srv, path, remote_path):
        def __get(srv, path, remote_path):
            dbg.job("submitted to: %s (%s)" % (srv, path))
            with open(path, "wb") as f:
                blob = srv.get(remote_path)
                if (blob is None):
                    time.sleep(1)
                    blob = srv.get(remote_path)
                for tr in reversed(self.translators):
                    blob = tr.get(blob)
                f.write(blob)

        self.scheduler.submit(srv, False, __get, path, remote_path)

    # bstore-related
    def bstore_download(self):
        # TODO, handle when R > 1
        lst = self.blobstore.list()
        #dbg.dbg("lst files:%s" % lst)

        lock = threading.Lock()

        def __get_next(srv, hash_dic, lock, allset, srvname):
            if (len(hash_dic[srvname]) == 0): return
            while True:
                lock.acquire()
                try:
                    next = hash_dic[srvname].pop()
                    l = len(hash_dic[srvname])
                    if (l % 10 == 0):
                        dbg.dbg("%s left %d" % (srvname, l))
                    if (next not in allset):
                        allset.add(next)
                    else:
                        next = None
                except:
                    lock.release()
                    break
                lock.release()
                if (next is not None):
                    remote_path = self.get_remote_obj_path(next)
                    path = os.path.join(self.path_objs, next)
                    with open(path, "wb") as f:
                        backoff = 0.5
                        while True:
                            blob = srv.get(remote_path)
                            if (blob is not None): break
                            dbg.dbg("back off %s" % srvname)
                            time.sleep(backoff)
                            backoff *= 2

                        for tr in reversed(self.translators):
                            blob = tr.get(blob)
                        f.write(blob)

        hash_dic = {}
        allset = set([])
        for srv in self.services:
            hash_dic[str(srv)] = []
            srvlist = srv.listdir(self.get_remote_obj_path())
            backoff = 1
            while srvlist is None:
                dbg.dbg("back off - listdir %s" % str(srv))
                time.sleep(backoff)
                srvlist = srv.listdir(self.get_remote_obj_path())

            for hashname in srvlist:
                if (hashname in lst):
                    #dbg.dbg("%s is already in bstore" % hashname)
                    continue
                hash_dic[str(srv)].append(hashname)

        nthreads = self.options.nthreads if self.options is not None else 2
        for srv in self.services:
            dbg.dbg("%s:%d dn" % (str(srv), len(hash_dic[str(srv)])))
            ##HACK
            for i in range(nthreads):
                self.scheduler.submit(srv, False, __get_next, hash_dic, lock,
                                      allset, str(srv))

    def bstore_sync_left(self, hashdic):
        cnt = 0
        for i in hashdic:
            cnt += len(hashdic[i])
        if (cnt == 0): return

        def __put_next(srv, lst, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if (len(lst) == 0):
                    lock.release()
                    break
                next = lst.pop()
                lock.release()
                if next is not None:
                    with open(self.get_local_obj_path(next), "rb") as f:
                        blob = f.read()
                        for tr in self.translators:
                            blob = tr.put(blob)
                        # XXX HACK
                        backoff = 0.5
                        remote_path = self.get_remote_obj_path(next)
                        while not srv.put(remote_path, blob):
                            time.sleep(backoff)
                            backoff *= 2

        lock_dic = {}
        for i in hashdic:
            lock_dic[i] = threading.Lock()

        nthreads = self.options.nthreads if self.options is not None else 2
        for srv in hashdic:
            for i in range(nthreads):
                self.scheduler.submit(self.srvmap[srv], False, __put_next,
                                      hashdic[srv], lock_dic[srv])

        self._join()

    #XXX: it needs to return after one set is put, and continue on replication.
    def bstore_sync(self, hashnames):
        dbg.dbg("need to sync: %s..@%d" % (hashnames[0], len(hashnames)))

        def __put_next(srv, hashdic, hashdic_left, allset, key, lock):
            dbg.job("submitted to: %s" % srv)
            while True:
                lock.acquire()
                if (len(hashdic[key]) == 0 or len(allset) == 0):
                    lock.release()
                    break

                next = hashdic[key].pop()
                if (next in allset):
                    allset.remove(next)
                else:
                    hashdic_left[key].append(next)
                    next = None
                lock.release()
                if next is not None:
                    with open(self.get_local_obj_path(next), "rb") as f:
                        blob = f.read()
                        for tr in self.translators:
                            blob = tr.put(blob)
                        # XXX HACK
                        backoff = 0.5
                        remote_path = self.get_remote_obj_path(next)
                        while not srv.put(remote_path, blob):
                            dbg.dbg("backoff %s" % srv)
                            time.sleep(backoff)
                            backoff *= 2

        nthreads = self.options.nthreads if self.options is not None else 2
        hashdic = {}
        hashdic_left = {}
        allset = set()
        lock = threading.Lock()
        for srv in self.srvmap:
            hashdic[srv] = []
            hashdic_left[srv] = []

        for hashname in hashnames:
            allset.add(hashname)
            for i in self.mapping.get_mapping(hashname):
                hashdic[i].append(hashname)

        for srv in hashdic:
            for i in range(nthreads):
                self.scheduler.submit(self.srvmap[srv], False, __put_next,
                                      hashdic, hashdic_left, allset, srv, lock)

        self._join()
        return hashdic_left

    # iterate bstore
    def bstore_iter(self):
        for root, dirs, files in os.walk(self.path_objs):
            for name in files:
                yield name

    def bstore_iter_remote(self, srv):
        assert srv in self.services

        # NOTE. at some point, we need cascaded directory hierarchy
        for obj in srv.listdir(self.get_remote_obj_path()):
            yield obj

    #XXX. update only changed files (SY)
    def restore_from_master(self):
        root = self.get_root_blob()
        dbg.dbg("restore")
        for name, blob in root.walk():
            pn = os.path.join(self.path_root, name)
            if blob.thv == "F":
                content = blob.read()
                util.write_file(pn, content.getvalue())
                content.close()
            if blob.thv == "m":
                content = blob.read()
                util.write_file(pn, content)
            elif blob.thv == "D" or blob.thv == "M":
                try:
                    os.mkdir(pn)
                except:
                    pass
        return True

    def propose_value(self, prev, newvalue):
        from paxos import Proposer
        self.proposer = Proposer(self.clientid, self.services,
                                 self.get_remote_path("pPaxos/" + prev))
        return self.proposer.propose(newvalue)

    # need to truncate if history is too long.
    def get_history(self, is_master=False):
        pn = self.path_master_history if is_master else self.path_head_history
        content = util.read_file(pn).strip()
        if content:
            history = content.split("\n")
            history.reverse()
        else:
            history = []

        return history

    def get_common_ancestor(self,
                            head_history,
                            master_history,
                            known_common_history=None):
        # change to use known_common_history
        for head in head_history:
            if (head in master_history):
                return head
        return None

    def try_merge(self, head_history, master_history):
        # this need to be fixed.
        dbg.dbg("Trying to merge")
        # we may need to cache the last branched point
        common = self.get_common_ancestor(head_history, master_history)
        dbg.dbg("%s %s %s", head_history[0], master_history[0], common)
        common = self.blobstore.get_blob(common, "D")
        head = self.get_root_blob()
        master = self.blobstore.get_blob(master_history[0], "D")

        added1 = head.diff(common)
        added2 = master.diff(common)

        def intersect(a, b):
            return list(set(a) & set(b))

        if (len(intersect(added1.keys(), added2.keys())) != 0):
            dbg.err("both modified--we need to handle it")
            return False
        for i in added2.keys():
            path = os.path.join(self.path_root, i)
            dirblob = self.blobstore.load_dir(os.path.dirname(path),
                                              dirty=True)
            dirblob.add(os.path.basename(path), added2[i], dirty=False)

        # HACK, need to go through all the non-overlapped history.
        self.append_history(master.hv)
        head.store()
        self.append_history(head.hv)
        # HACK, need to be changed
        newblobs = self.blobstore.get_added_blobs()

        # push new blobs remotely
        self.bstore_sync(newblobs)
        self._join()

        return True

    def get_uptodate_master(self, includeself=True, srv=None):
        # copy all the heads. --- it should have version number. or something to compare against each other.
        if srv == None:
            srv = self.services[0]
        prev_clients = filter(lambda x: x.startswith("prev_"),
                              srv.listdir(self.get_remote_path("")))
        pointers = set()
        for prev in prev_clients:
            if not includeself or not prev.endswith(self.clientid):
                with open(self.get_path(prev), "w") as f:
                    pointer = srv.get(self.get_remote_path(prev))
                    pointers.add(pointer)
                    f.write(pointer)
        if includeself:
            pointers.add(self.get_prev_value())

        return max(pointers, key=lambda x: int(x.split(".")[2]))

    def check_master_uptodate(self):
        srv = self.services[0]
        remote_master = srv.get(self.get_remote_path("master"))
        with open(self.path_master) as f:
            master_head = f.read().strip()
        if (master_head != remote_master): return False
        return True

    def cmd_poll(self):
        srv = self.services[0]
        srv.poll(self.namespace)

    #
    # end-user's interfaces (starting with cmd_ prefix)
    #   NOTE. explicitly return True/False to indicate status of 'cmd'
    #

    def cmd_share(self, target_email):
        if not self.check_sanity():
            dbg.err("this is not metasync repo")
            return False

        for srv in self.services:
            srv.share(self.namespace, target_email)

    def cmd_diff(self):
        # work only for 1-level directory
        # need to add diff for file
        if not self.check_sanity():
            dbg.err("this is not metasync repo")
            return False
        root = self.get_root_blob()
        added = []
        removed = []
        files = os.listdir(".")
        for f in files:
            if (f == ".metasync"): continue
            if ("/" + f not in root.files):
                added.append(f)

        for f in root.files:
            if (f[1:] not in files):
                removed.append(f[1:])

        for f in added:
            print("+++ %s" % f)

        for f in removed:
            print("--- %s" % f)

    def cmd_mv(self, src_pn, dst_pn):
        if not self.check_sanity():
            dbg.err("it's not a metasync repo.")
            return False
        src_pn = os.path.abspath(src_pn)
        dst_pn = os.path.abspath(dst_pn)

        #TODO: check src_pn exists
        beg = time.time()
        try:
            dirname = os.path.dirname(src_pn)
            dirblob = self.blobstore.load_dir(dirname, False, dirty=True)
            if (dirblob is None):
                dbg.err("%s does not exist" % src_pn)
                return False
        except NotTrackedException as e:
            dbg.err(str(e))
            return False

        fname = os.path.basename(src_pn)
        if (not fname in dirblob):
            dbg.err("%s does not exist" % pn)
            return False
        fblob = dirblob[fname]
        dirblob.rm(fname)

        dst_dirname = os.path.dirname(dst_pn)
        if (dirname != dst_dirname):
            dirblob = self.blobstore.load_dir(dirname, True, dirty=True)
            assert dirblob is not None

        dst_fname = os.path.basename(dst_pn)
        dirblob.add(dst_fname, fblob, dirty=False)

        root = self.get_root_blob()
        root.store()
        newblobs = self.blobstore.get_added_blobs()

        util.write_file(self.get_head(), root.hv)
        self.append_history(root.hv)

        end = time.time()
        dbg.time("local write: %f" % (end - beg))

        # push new blobs remotely
        self.bstore_sync(newblobs)
        self._put_all(self.get_head(),
                      self.get_remote_path(self.get_head_name()))

        end = time.time()
        dbg.time("remote write: %f" % (end - beg))

        # move the file
        shutil.move(src_pn, dst_pn)
        self._join()

        return True

    def cmd_peek(self):
        root = self.get_root_blob()
        for i in root.walk():
            print(i)
        # print("hash: %s" % root.hash_head)
        # print(root.dump_info())
        # with open(self.path_master) as f:
        #     master_head = f.read().strip()
        # with open(self.get_head()) as f:
        #     head = f.read().strip()
        # print("head_history %s" % ",".join(self.get_history(head)))
        # print("master_history %s" %  ",".join(self.get_history(master_head)))

    def cmd_fetch(self):
        if not self.check_sanity():
            dbg.err("it's not a metasync repo.")
            return False

        # TODO: change it into comparing between masters
        self.bstore_download()

        self._join()
        return True

    def update_changed(self, head, master):
        def _file_create(blob, pn):
            if (blob.thv == "D" or blob.thv == "M"):
                util.mkdirs(pn)
                for i in blob.entries:
                    _file_create(blob[i], os.path.join(pn, i))
            elif (blob.thv == "F"):
                content = blob.read()
                util.write_file(pn, content.getvalue())
                content.close()
                # touch metadata blob (for cmd_status)
                os.utime(os.path.join(self.path_objs, blob.hv), None)
            elif (blob.thv == "m"):
                content = blob.read()
                util.write_file(pn, content)
                # touch metadata blob (for cmd_status)
                os.utime(os.path.join(self.path_objs, blob.hv), None)
            else:
                assert False

        def _update(old_dirblob, new_dirblob, path):
            for fname in new_dirblob.entries:
                blob = new_dirblob[fname]
                if (fname not in old_dirblob):
                    _file_create(blob, os.path.join(path, fname))
                elif (blob.hv != old_dirblob[fname].hv):
                    if (blob.thv == "D"):
                        _update(old_dirblob[fname], blob,
                                os.path.join(path, fname))
                    elif (blob.thv == "F"):
                        _file_create(blob, os.path.join(path, fname))
                    else:
                        print(blob.thv)
                        assert False

        # print('head: ', head)
        # print('master: ', master)
        headblob = self.blobstore.get_blob(head, "D")
        masterblob = self.blobstore.get_blob(master, "D")
        _update(headblob, masterblob, self.path_root)

    def update_head_and_prev(self, master):
        with open(self.get_prev(), "w") as f:
            f.write(master)
        with open(self.get_head(), "w") as f:
            f.write(master)

    def cmd_update(self):
        master = self.get_uptodate_master(False)
        # already up-to-date
        prev = self.get_prev_value()
        if (master == prev):
            self.update_head_and_prev(master)
            return True

        head = self.get_head_and_config()
        # XXX: need to check if non-checked in but modified files.
        if (head == prev):
            headstr = head.split(".")[0]
            if headstr == '':
                self.update_head_and_prev(master)
                return self.restore_from_master()

            masterstr = master.split(".")[0]
            self.update_changed(headstr, masterstr)
        else:
            ### need to merge
            raise Exception('Merge required')

        self.update_head_and_prev(master)
        self.blobstore.rootblob = None
        dbg.info("update done %s" % time.ctime())
        return True

    #XXX: Seungyeop is working on it.
    def cmd_clone(self, namespace, backend=None, encrypt_key=None):
        # if wrong target
        if self.check_sanity():
            return False

        # reset all the path by including the namespace
        self.path_root = os.path.join(self.path_root, namespace)
        self.path_meta = os.path.join(self.path_root, META_DIR)
        self.path_conf = self.get_path("config")
        self.path_objs = self.get_path("objects")
        #self.path_head_history = self.get_path("head_history")

        if os.path.exists(self.path_root):
            dbg.err("%s already exists." % self.path_root)
            return False

        if backend is None:
            print "input one of the storage backends, (e.g., dropbox,google,box)"
            print "  for testing, use disk@/path (e.g., disk@/tmp)"
            backend = raw_input("> ")

        srv = services.factory(backend)
        self.namespace = namespace

        # create repo directory
        os.mkdir(self.path_root)
        os.mkdir(self.path_meta)
        os.mkdir(self.path_objs)

        curmaster = self.get_uptodate_master(False, srv)
        sp = curmaster.split(".")
        master = sp[0]
        seed = sp[1]
        seed = srv.get(self.get_remote_path("configs/%s" % seed))
        conf = util.loads_config(seed)

        # setup client specific info
        conf.set('core', 'clientid', util.gen_uuid())
        conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key))

        with open(self.path_conf, "w") as fd:
            conf.write(fd)

        self._load()
        beg = time.time()
        self.bstore_download()
        self._join()

        with open(self.get_head(), "w") as f:
            f.write(curmaster)
        with open(self.get_prev(), "w") as f:
            f.write(curmaster)

        # send my head to remote
        self._put_all(self.get_head(),
                      self.get_remote_path(self.get_head_name()))
        self._put_all(self.get_prev(),
                      self.get_remote_path(self.get_prev_name()))
        self._join()

        if (master):
            ret = self.restore_from_master()
        end = time.time()
        dbg.dbg("clone: %ss" % (end - beg))
        return True

    def cmd_init(self,
                 namespace,
                 backend=None,
                 nreplicas=None,
                 encrypt_key=None):
        # already initialized?
        if self.check_sanity():
            dbg.err("already initialized %s (%s)" \
                     % (self.path_root, self.namespace))
            return False

        os.mkdir(self.path_meta)
        os.mkdir(self.path_objs)

        # build config opts
        conf = util.new_config()

        # core: unique/permanent info about local machine (often called client)
        #   NOTE. not sure if encryption_key should be in core, or unchangable
        conf.add_section('core')
        conf.set('core', 'namespace', namespace)
        conf.set('core', 'clientid', util.gen_uuid())
        conf.set('core', 'encryptkey', _get_conf_encryptkey(encrypt_key))

        # backend: info about sync service providers
        # XXX: Error handling
        conf.add_section('backend')
        try:
            services = _get_conf_services(backend)
            conf.set('backend', 'services', services)
            conf.set('backend', 'nreplicas',
                     _get_conf_nreplicas(nreplicas, len(services.split(","))))
        except:
            pass

        # flush
        with open(self.path_conf, "w") as fd:
            conf.write(fd)

        try:
            self._load()
        except NameError:
            shutil.rmtree(self.path_meta)
            return False

        # put config into remote
        conf.remove_option('core', 'clientid')
        conf.remove_option('core', 'encryptkey')

        with io.BytesIO() as out:
            conf.write(out)
            val = out.getvalue()
            configname = util.sha1(val)
            self._put_all_content(
                val, self.get_remote_path("configs/%s" % configname[:6]), True)

            #temporary --- move this to pPaxos
            #self._put_all_content(configname[:6], self.get_remote_path("config"), True)

        # Format for master: headhash.config[:6].version
        prev_master = "." + configname[:6] + ".0"
        # do we need both? or shall we put them into a file together.
        with open(self.get_head(), "w") as f:
            f.write(prev_master)
        with open(self.get_prev(), "w") as f:
            f.write(prev_master)
        self._put_all_dir(self.get_remote_path("objects"))
        # change to put_content
        self._put_all(self.get_head(),
                      self.get_remote_path(self.get_head_name()))
        self._put_all(self.get_prev(),
                      self.get_remote_path(self.get_prev_name()))

        from paxos import Proposer
        self.proposer = Proposer(None, self.services,
                                 self.get_pPaxos_path(prev_master))
        self._join()

        return True

    def get_pPaxos_path(self, path):
        return self.get_remote_path("pPaxos/" + path)

    def cmd_gc(self):
        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False

        def _find_all_blobs(blob, tracked):
            # we may need to move this to blobstore
            if (blob.hv in tracked): return
            tracked.add(blob.hv)
            if (blob.thv == "C"): return
            for name, childblob in blob.entries.iteritems():
                _find_all_blobs(childblob, tracked)

        # check head
        head = self.get_head_value()
        tracked = set([])
        if (head is not None and len(head) > 0):
            blob = self.blobstore.get_blob(head, "D")
            _find_all_blobs(blob, tracked)
        # check master
        with open(self.path_master) as f:
            master_head = f.read().strip()
        if (len(master_head) > 0):
            blob = self.blobstore.get_blob(master_head, "D")
            _find_all_blobs(blob, tracked)

        allblobs = set(self.blobstore.list())

        # remove following
        blobs_to_remove = allblobs - tracked

        def __rm(srv, remote_path):
            dbg.job("submitted to: %s (%s)" % (srv, remote_path))
            srv.rm(remote_path)

        for hashname in blobs_to_remove:
            for i in self.mapping.get_mapping(hashname):
                self.scheduler.submit(self.srvmap[i], True, __rm,
                                      self.get_remote_obj_path(hashname))
            os.unlink(self.get_local_obj_path(hashname))

        return True

    def cmd_rm(self, pn):
        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False
        #TODO: check if the file exists

        beg = time.time()
        try:
            dirname = os.path.dirname(pn)
            dirblob = self.blobstore.load_dir(dirname, False)
            if (dirblob is None):
                dbg.err("%s does not exist" % pn)
                return False
        except NotTrackedException as e:
            dbg.err(str(e))
            return False

        fname = os.path.basename(pn)
        if (not fname in dirblob):
            dbg.err("%s does not exist" % pn)
            return False

        dirblob.rm(fname)
        root = self.get_root_blob()
        root.store()
        newblobs = self.blobstore.get_added_blobs()

        # we may need to include pointer for previous version.
        util.write_file(self.get_head(), root.hv)
        self.append_history(root.hv)

        end = time.time()
        dbg.time("local write: %f" % (end - beg))

        # push new blobs remotely
        self.bstore_sync(newblobs)
        self._put_all(self.get_head(),
                      self.get_remote_path(self.get_head_name()))

        end = time.time()
        dbg.time("remote write: %f" % (end - beg))
        self._join()

        # drop local copy
        # TODO: rm only tracked files if removing file.
        try:
            os.unlink(pn)
        except:
            dbg.err("failed to rm %s" % pn)
            return False

        return True

    def append_history(self, hv):
        util.append_file(self.path_head_history, hv + "\n")

    def cmd_checkin(self, paths, unit=BLOB_UNIT, upload_only_first=False):
        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False
        if type(paths) != types.ListType:
            paths = [paths]
        for pn in paths:
            if not os.path.exists(pn):
                dbg.err("File %s doesn't exits." % pn)
                return False

        beg = time.time()
        #XXX: considering mtime, check hash of chunks?
        changed = False
        for path in paths:
            if (not os.path.isfile(path)):
                changed = True
                for root, dirs, files in os.walk(path):
                    fsizesum = 0
                    for fname in files:
                        fsizesum += os.stat(os.path.join(root, fname)).st_size
                    print(root + " " + str(fsizesum))
                    if (fsizesum < unit):
                        dirblob = self.blobstore.load_dir(root,
                                                          dirty=True,
                                                          merge=True)
                        for fname in files:
                            dirblob.add_file(fname, os.path.join(root, fname))
                        dirblob.done_adding()
                    else:
                        dirblob = self.blobstore.load_dir(root, dirty=True)
                        for fname in files:
                            fileblob = self.blobstore.load_file(
                                os.path.join(root, fname), unit)
                            if (fname in dirblob
                                    and dirblob[fname].hv == fileblob.hv):
                                continue
                            dirblob.add(fname, fileblob)
            else:
                fileblob = self.blobstore.load_file(path, unit)
                dirname = os.path.dirname(path)
                if (dirname == ""): dirname = "."
                dirblob = self.blobstore.load_dir(dirname, dirty=True)
                fname = os.path.basename(path)
                if (fname in dirblob and dirblob[fname].hv == fileblob.hv):
                    continue
                changed = True
                dirblob.add(fname, fileblob)
        if (not changed): return True
        root = self.get_root_blob()
        root.store()
        newblobs = self.blobstore.get_added_blobs()

        util.write_file(
            self.get_head(), "%s.%s.%d" %
            (root.hv, self.get_config_hash(), self.get_next_version()))

        end = time.time()
        dbg.time("local write: %f" % (end - beg))

        # push new blobs remotely
        leftover = self.bstore_sync(newblobs)
        self._update_all(self.get_head(),
                         self.get_remote_path(self.get_head_name()))

        self._join()
        end = time.time()
        dbg.time("remote write for R1: %f" % (end - beg))
        if (not upload_only_first):
            self.bstore_sync_left(leftover)
            end = time.time()
            dbg.time("remote write for left: %f" % (end - beg))
            return []
        else:
            return leftover

    def cmd_push(self):
        prev = self.get_prev_value()
        newvalue = self.get_head_and_config()
        val = self.propose_value(prev, newvalue)
        # print("val: ", val, "newval: ", newvalue, "prev: ", prev)
        if (val != newvalue):
            dbg.err("You should fetch first")
            return False

        # with open(self.path_master) as f:
        #     master_head = f.read().strip()
        # with open(self.get_head()) as f:
        #     head = f.read().strip()
        # if(len(master_head) > 0):
        #     head_history = self.get_history()
        #     if(not master_head in head_history):
        #         dbg.err("You should update first")
        #         self.unlock_master()
        #         return False
        # check master is ancestor of the head

        shutil.copyfile(self.get_head(), self.get_prev())
        self._update_all(self.get_prev(),
                         self.get_remote_path(self.get_prev_name()))
        from paxos import Proposer
        self.proposer = Proposer(None, self.services,
                                 self.get_pPaxos_path(newvalue))
        self._join()
        return True

    def cmd_status(self, unit=BLOB_UNIT):
        def simple_walk(folder):
            # simple_walk will skip dipping into the folder
            # that are not tracked in the repo
            untracked = []
            changed = []

            for f in os.listdir(folder):
                if f == META_DIR:
                    continue
                basename = os.path.basename(folder)
                if basename == '.' or basename == '':
                    relpath = f
                else:
                    relpath = os.path.join(folder, f)
                if relpath in tracked:
                    if os.path.isdir(f):
                        _untracked, _changed = simple_walk(relpath)
                        untracked.extend(_untracked)
                        changed.extend(_changed)
                    else:
                        fblob = tracked[relpath]
                        # compare the file modified time and its metadata blob modified time
                        curr_mtime = os.path.getmtime(relpath)
                        last_mtime = os.path.getmtime(
                            os.path.join(self.path_objs, fblob.hv))
                        if curr_mtime > last_mtime:
                            # only load file when the file modified time is greater than metadata modified time
                            fblob._load()
                            flag = False
                            # compare chunk hash
                            for (offset,
                                 chunk) in util.each_chunk2(relpath, unit):
                                if util.sha1(
                                        chunk) != fblob.entries[offset].hv:
                                    flag = True
                                    break
                            if flag:
                                changed.append(relpath)
                else:
                    if os.path.isdir(relpath):
                        relpath = os.path.join(relpath, '')
                    untracked.append(relpath)
            return untracked, changed

        if not self.check_sanity():
            dbg.err("this is not a metasync repo")
            return False

        # switch to metasync repo root folder
        os.chdir(self.path_root)

        # compare the head and master history
        head_history = self.get_history()
        master_history = self.get_history(True)
        head_diverge = 0
        for head in head_history:
            if (head in master_history):
                break
            head_diverge += 1
        if head_diverge == len(head_history):
            master_diverge = len(master_history)
        else:
            master_diverge = master_history.index(head_history[head_diverge])

        if head_diverge == 0 and master_diverge == 0:
            print "\nYour branch is up-to-date with master."
        elif head_diverge == 0:
            print "\nYour branch is behind master by %d commit(s)." % master_diverge
        elif master_diverge == 0:
            print "\nYour branch is ahead of master by %d commit(s)." % head_diverge
        else:
            print "\nYour branch and master have diverged,"
            print "and have %d and %d different commits each, respectively" % (
                head_diverge, master_diverge)

        root = self.get_root_blob()
        tracked = {}
        for (path, blob) in root.walk():
            tracked[path] = blob

        untracked, changed = simple_walk('.')
        if changed:
            print("\nChanges not checked in:")
            for f in changed:
                print("\033[31m\tmodified:   %s\033[m" % f)

        if untracked:
            print("\nUntracked files:")
            for f in untracked:
                print("\033[31m\t%s\033[m" % f)

        return True
示例#17
0
class Server:
    def __init__(self, config, globalConfig, client_sock=None):
        self.config = config
        self.globalConfig = globalConfig
        self.init_balance = 100
        self.set = []
        self.blockchain = []
        self.proposer = Proposer(self.config, globalConfig)
        self.acceptor = Acceptor(self.config)
        self.inPaxos = False

    def run(self):
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        sock.bind((self.config["ip-addr"], self.config["port"]))
        sock.listen()
        try:
            with open(self.config["name"] + "_blockchain.txt", "rb") as f:
                loadedBlockchain = pickle.load(f)
                self.blockchain = loadedBlockchain
                f.close()
        except:
            print("File doesn't exist.")

        self.askResync()
        print("Server is listening...")
        while True:
            conn, addr = sock.accept()
            t1 = threading.Thread(target=Server.handleReq, args=(
                self,
                conn,
            ))
            t1.start()

    def handleReq(self, conn):
        while True:
            msg = conn.recv(1024)
            msg_set = set()
            msg_set.add(msg)
            for m in msg_set:
                if m:
                    try:
                        decodedMsg = pickle.loads(msg)
                        t1 = threading.Thread(target=Server.handleClientMsg,
                                              args=(
                                                  self,
                                                  decodedMsg,
                                                  conn,
                                              ))
                        t1.start()
                    except EOFError:
                        print("pickle EOF")

    def handleClientMsg(self, decodedMsg, conn):
        #print("thread msg recvd", decodedMsg)
        if decodedMsg["msg"] == "TRANSFER":
            #add to set
            self.client_sock = conn
            self.set.append(decodedMsg)
            if len(self.set) >= 2:
                self.handlePaxos(decodedMsg)
        elif decodedMsg["msg"] == "PRINTBLOCKCHAIN":
            self.printBlockchain(decodedMsg, conn)
        elif decodedMsg["msg"] == "PRINTBALANCE":
            self.printBalance(decodedMsg, conn)
        elif decodedMsg["msg"] == "PRINTSET":
            self.printSet(decodedMsg, conn)
        elif decodedMsg["msg"] == "CRASH":
            msg = {}
            msg["msg"] = "CRASH-ACK"
            encMsg = pickle.dumps(msg)
            conn.sendall(encMsg)
            #conn.shutdown(socket.SHUT_RDWR)
            print("Emulating server crash.")
            #conn.close()
            os._exit(1)
        elif decodedMsg["msg"] == "RESYNC":
            print("Received RESYNC request from ", decodedMsg["src-name"])
            startIndex = decodedMsg["cur-depth"]
            partialBlockchain = []
            if (startIndex is None):
                startIndex = 0
            else:
                startIndex = startIndex + 1
            for x in range(startIndex, len(self.blockchain)):
                b = self.blockchain[x]
                copiedB = Block(b.tx1, b.tx2, b.prevHash, b.depth, b.nonce)
                partialBlockchain.append(copiedB)

            encMsg = pickle.dumps(createResyncAck(self, partialBlockchain))
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            try:
                s.connect(
                    (self.globalConfig[decodedMsg["src-name"]]["ip-addr"],
                     self.globalConfig[decodedMsg["src-name"]]["port"]))
                time.sleep(randDelay())
                s.sendall(encMsg)
            except socket.error as sock_err:
                if (sock_err.errno == socket.errno.ECONNREFUSED):
                    print("Server " + proc["name"] + " unreachable.")
        elif decodedMsg["msg"] == "RESYNC-ACK":
            #print("Received blockchain from ", decodedMsg["src-name"])
            partialBlockchain = decodedMsg["blockchain"]
            lock.acquire()
            for b in partialBlockchain:
                if (b.depth == len(self.blockchain)):
                    self.blockchain.append(b)
            lock.release()
            if (len(self.set) >= 2):
                x = {}
                x["msg"] = "RETRY"
                thp = threading.Thread(target=Server.handlePaxos,
                                       args=(
                                           self,
                                           x,
                                       ))
                thp.start()
        else:
            self.handlePaxos(decodedMsg)

    def handlePaxos(self, decodedMsg):
        lock.acquire()
        if (decodedMsg["msg"] == "TRANSFER"
                and self.inPaxos == False) or (decodedMsg["msg"] == "RETRY"
                                               and self.inPaxos == False):
            if (len(self.set) >= 2):
                self.inPaxos = True
                if (self.checkProposeReady() == False):
                    tempMsg = createServerRes(
                        self.config, decodedMsg,
                        msgFormatTrans(self.set[0]) + " and " +
                        msgFormatTrans(self.set[1]) +
                        " have been added to the blockchain.", "TRANSFER-ACK")
                    encMsg = pickle.dumps(tempMsg)
                    self.client_sock.sendall(encMsg)
                    self.inPaxos = False
        lock.release()
        if decodedMsg["msg"] == "PREPARE":
            if (decodedMsg["bal-num"].depth > len(self.blockchain)):
                self.askResync()
            if (decodedMsg["bal-num"] is not None
                    and decodedMsg["bal-num"].depth == len(self.blockchain)):
                if (decodedMsg["src-name"] != self.config["name"]
                        and self.proposer.balNum is not None
                        and decodedMsg["bal-num"].seqNum >
                        self.proposer.curSeqNum):
                    self.proposer.curSeqNum = decodedMsg["bal-num"].seqNum
                self.acceptor.recvPrepare(decodedMsg)
                if (self.inPaxos == True):
                    prevLen = len(self.blockchain)
                    time.sleep(15)
                    lock.acquire()
                    if (prevLen == len(self.blockchain)):
                        if (self.inPaxos == True):
                            self.inPaxos = False
                            print("Timed-out. Retrying. Prev len was ",
                                  prevLen, " but len blockchain now ",
                                  len(self.blockchain))
                            self.proposer.curSeqNum += 1
                            x = {}
                            x["msg"] = "RETRY"
                            thp = threading.Thread(target=Server.handlePaxos,
                                                   args=(
                                                       self,
                                                       x,
                                                   ))
                            thp.start()
                    lock.release()
            elif decodedMsg["bal-num"] and (
                (len(self.blockchain) == 0 and decodedMsg["bal-num"].depth > 0)
                    or (len(self.blockchain) < decodedMsg["bal-num"].depth)):
                self.askResync()
        elif decodedMsg["msg"] == "PREP-ACK":
            if decodedMsg["accept-num"] is None or (
                    decodedMsg["accept-num"] is not None and
                    decodedMsg["accept-num"].depth == len(self.blockchain)):
                self.proposer.handlePrepAck(decodedMsg)
        elif decodedMsg["msg"] == "ACCEPT":
            if (decodedMsg["bal-num"] is not None
                    and decodedMsg["bal-num"].depth == len(self.blockchain)):
                if (decodedMsg["src-name"] != self.config["name"]
                        and self.proposer.balNum is not None
                        and decodedMsg["bal-num"] >= self.proposer.balNum):
                    self.proposer.curSeqNum = decodedMsg["bal-num"].seqNum
                self.acceptor.recvAccept(decodedMsg)
        elif decodedMsg["msg"] == "ACCEPT-ACK":
            if decodedMsg["accept-num"] is None or (
                    decodedMsg["accept-num"] is not None and
                    decodedMsg["accept-num"].depth == len(self.blockchain)):
                self.proposer.handleAcceptAck(decodedMsg)
        elif decodedMsg["msg"] == "DECISION":
            if (decodedMsg["bal-num"] is not None
                    and decodedMsg["bal-num"].depth == len(self.blockchain)):
                self.proposer.curSeqNum = 0
                self.handleDecision(decodedMsg)
                x = {}
                x["msg"] = "RETRY"
                self.handlePaxos(x)
            elif decodedMsg["bal-num"] and (
                (len(self.blockchain) == 0 and decodedMsg["bal-num"].depth > 0)
                    or (len(self.blockchain) < decodedMsg["bal-num"].depth)):
                self.askResync()

    def askResync(self):
        print("Sending RESYNC request to all servers.")
        encMsg = pickle.dumps(createResyncRequest(self))
        self.broadcast(encMsg)

    def broadcast(self, m):
        threads = []
        for s in servers:
            if (s in PARTITION[self.config["name"]]):
                t = threading.Thread(
                    target=Server.randDelayMsg,
                    args=(self, m, self.globalConfig[s]),
                )
                threads.append(t)
            else:
                print("NW partition - cannot speak to ", s)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

    def randDelayMsg(self, m, proc):
        b = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            b.connect((proc["ip-addr"], proc["port"]))
            time.sleep(randDelay())
            b.sendall(m)
            b.close()
        except socket.error as sock_err:
            if (sock_err.errno == socket.errno.ECONNREFUSED):
                pass

    def checkProposeReady(self):
        block = self.transactionCheck()
        if block is not None:
            self.createBallotThread(block)
            return True
        return False

    def transactionCheck(self):
        val = None
        if (len(self.set) >= 2):
            t1 = msgFormatTrans(self.set[0])
            t2 = msgFormatTrans(self.set[1])
            if (self.validateTrans(self.set[0], self.set[1]) == True):
                val = self.mineBlock(t1, t2)
            else:
                print("Previous 2 transactions not valid.")
                temp = self.set.pop(0)
                self.set.append(temp)
                temp = self.set.pop(0)
                self.set.append(temp)
        return val

    def calcBalance(self):
        # return dict of 5 balances
        balance = {
            'A': self.init_balance,
            'B': self.init_balance,
            'C': self.init_balance,
            'D': self.init_balance,
            'E': self.init_balance
        }
        for b in self.blockchain:
            t1 = transFormatDict(b.tx1)
            t2 = transFormatDict(b.tx2)
            balance[t1["sender"]] = balance[t1["sender"]] - int(t1["amount"])
            balance[t2["sender"]] = balance[t2["sender"]] - int(t2["amount"])
            balance[t1["receiver"]] = balance[t1["receiver"]] + int(
                t1["amount"])
            balance[t2["receiver"]] = balance[t2["receiver"]] + int(
                t2["amount"])
        return balance

    def validateTrans(self, t1, t2):
        balance = self.calcBalance()

        return (balance[t1["sender"]] - t1["amount"] - t2["amount"] >= 0)

    def calcPrevHash(self, b):
        s = str(b.tx1 + b.tx2 + b.nonce)
        shaHash = hashlib.sha256(s.encode())
        digest = shaHash.hexdigest()
        return digest

    def mineBlock(self, t1, t2):
        prevHash = None
        depth = 0
        if (len(self.blockchain) > 0):
            prevHash = self.calcPrevHash(self.blockchain[len(self.blockchain) -
                                                         1])
            depth = len(self.blockchain)
        b = Block(t1, t2, prevHash, depth)
        b.mine()
        return b

    def createBallotThread(self, block):
        self.proposer.createBallot(block, len(self.blockchain))

    def handleDecision(self, dMsg):
        self.acceptor.recvDecision(dMsg)
        if (dMsg["bal-num"].depth == len(self.blockchain)):
            self.inPaxos = False
            if (len(self.set) >= 2
                    and dMsg["val"].tx1 == msgFormatTrans(self.set[0])
                    and dMsg["val"].tx2 == msgFormatTrans(self.set[1])):
                tempMsg = createServerRes(
                    self.config, dMsg,
                    msgFormatTrans(self.set[0]) + " and " +
                    msgFormatTrans(self.set[1]) +
                    " have been committed to the blockchain.", "TRANSFER-ACK")
                encMsg = pickle.dumps(tempMsg)
                self.client_sock.sendall(encMsg)
                self.set.pop(
                    0)  # pop first 2 items because committed successfully
                self.set.pop(0)
                print("Popped transactions from set.")
            self.blockchain.append(dMsg["val"])
            with open(self.config["name"] + "_blockchain.txt", "wb") as f:
                print("Saving current blockchain to disk.")
                pickle.dump(self.blockchain, f)
                f.close()
            print("New blockchain length: ", len(self.blockchain))
            print("Commiting block to blockchain. Block: \n", dMsg["val"])
            if (len(self.set) >= 2):
                x = {}
                x["msg"] = "RETRY"
                self.handlePaxos(x)

        else:
            print(
                "Not commiting block. Block Depth < Current Blockchain Depth")

    def printBlockchain(self, dMsg, conn):
        msg = createServerRes(self.config, dMsg, self.blockchain,
                              "BLOCKCHAIN-ACK")
        encMsg = pickle.dumps(msg)
        conn.sendall(encMsg)

    def printBalance(self, dMsg, conn):
        balance = self.calcBalance()
        msg = createServerRes(self.config, dMsg, balance, "BALANCE-ACK")
        encMsg = pickle.dumps(msg)
        conn.sendall(encMsg)

    def printSet(self, dMsg, conn):
        #print("printing set")
        setList = []
        for tran in self.set:
            setList.append(msgFormatTrans(tran))
        msg = createServerRes(self.config, dMsg, setList, "SET-ACK")
        encMsg = pickle.dumps(msg)
        conn.sendall(encMsg)