示例#1
0
    def __init__(self, **config):

        self.config = Config.copy()
        self.config.update(config)

        self.drive = self.build_service()

        self.data_dir = self.get_data_dir()
        self.uuid = hashlib.sha1(self.data_dir).hexdigest()
        self.load_data_dir()

        self.block_size = self.bd_attr['block_size']
        self.block_count = self.bd_attr['block_count']
        self.total_size = self.block_size * self.block_count
        self.mapping = [None] * self.block_count
        self.que = TimedPriorityQueue()
        self.lock = Lock()

        self.running = True
        self.workers = []
        for i in xrange(self.config.get('workers', 8)):
            worker = GBDWorker(self, self.build_service())
            worker.daemon = True
            worker.start()
            self.workers.append(worker)
示例#2
0
文件: gbd.py 项目: coquelicot/GBD
    def __init__(self, **config):

        self.config = Config.copy()
        self.config.update(config)

        self.auth_mgr = AuthManager(
            self.config['appname'],
            self.config['oauth_client_id'],
            self.config['oauth_client_secret'],
            self.config['oauth_scope'],
            self.config['oauth_redirect_uri'])
        self.drive = self.build_service()

        self.data_dir = self.get_data_dir()
        self.uuid = hashlib.sha1(self.data_dir).hexdigest()
        self.load_data_dir()

        self.block_size = self.bd_attr['block_size']
        self.block_count = self.bd_attr['block_count']
        self.total_size = self.block_size * self.block_count
        self.mapping = [None] * self.block_count
        self.que = TimedPriorityQueue()
        self.lock = Lock()

        self.running = True
        self.workers = []
        for i in xrange(self.config.get('workers', 8)):
            worker = GBDWorker(self, self.build_service())
            worker.daemon = True
            worker.start()
            self.workers.append(worker)
示例#3
0
    def __init__(self, cache_file, dirty=False, *args, **kargs):

        if 'workers' not in kargs:
            kargs['workers'] = 16

        self.done = False

        self.gbd = GBD(*args, **kargs)
        self.uuid = self.gbd.uuid
        self.block_size = self.gbd.block_size
        self.block_count = self.gbd.block_count
        self.total_size = self.gbd.total_size

        self.cache = open(cache_file, 'r+b')
        self.cache_lock = Lock()
        self.entry_count = self.calc_entry_count()
        self.clean_que = RLUQueue(self.entry_count)
        self.dirty_que = RLUQueue(self.entry_count)
        self.last_modify = [0] * self.entry_count
        self.map = {}
        self.rmap = [self.EMPTY] * self.entry_count
        self.map_lock = Lock()
        self.load_cache(dirty)

        self.wb_sem = Semaphore(8)
        self.wb_daemon = Thread(target=self.do_writeback)
        self.wb_daemon.daemon = True
        self.wb_daemon.start()

        self.pull_que = TimedPriorityQueue()
        self.dque_lock = Lock()
        self.pull_delay_que = {}
        self.pull_daemon = Thread(target=self.do_pull)
        self.pull_daemon.daemon = True
        self.pull_daemon.start()

        self.done = True
示例#4
0
    def __init__(self, cache_file, dirty=False, *args, **kargs):

        if 'workers' not in kargs:
            kargs['workers'] = 16

        self.done = False

        self.gbd = GBD(*args, **kargs)
        self.uuid = self.gbd.uuid
        self.block_size = self.gbd.block_size
        self.block_count = self.gbd.block_count
        self.total_size = self.gbd.total_size

        self.cache = open(cache_file, 'r+b')
        self.cache_lock = Lock()
        self.entry_count = self.calc_entry_count()
        self.clean_que = RLUQueue(self.entry_count)
        self.dirty_que = RLUQueue(self.entry_count)
        self.last_modify = [0] * self.entry_count
        self.map = {}
        self.rmap = [self.EMPTY] * self.entry_count
        self.map_lock = Lock()
        self.load_cache(dirty)

        self.wb_sem = Semaphore(8)
        self.wb_daemon = Thread(target=self.do_writeback)
        self.wb_daemon.daemon = True
        self.wb_daemon.start()

        self.pull_que = TimedPriorityQueue()
        self.dque_lock = Lock()
        self.pull_delay_que = {}
        self.pull_daemon = Thread(target=self.do_pull)
        self.pull_daemon.daemon = True
        self.pull_daemon.start()

        self.done = True
示例#5
0
class GBD:

    FOLDER_MIMETYPE = 'application/vnd.google-apps.folder'
    BLOCK_MIMETYPE = 'application/octet-stream'

    def __init__(self, **config):

        self.config = Config.copy()
        self.config.update(config)

        self.drive = self.build_service()

        self.data_dir = self.get_data_dir()
        self.uuid = hashlib.sha1(self.data_dir).hexdigest()
        self.load_data_dir()

        self.block_size = self.bd_attr['block_size']
        self.block_count = self.bd_attr['block_count']
        self.total_size = self.block_size * self.block_count
        self.mapping = [None] * self.block_count
        self.que = TimedPriorityQueue()
        self.lock = Lock()

        self.running = True
        self.workers = []
        for i in xrange(self.config.get('workers', 8)):
            worker = GBDWorker(self, self.build_service())
            worker.daemon = True
            worker.start()
            self.workers.append(worker)

    ## init

    def build_service(self):
        SCOPES = ['https://www.googleapis.com/auth/drive']
        creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
        if os.path.exists('token.json'):
            creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    'credentials.json', SCOPES)
                creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
            with open('token.json', 'w') as token:
                token.write(creds.to_json())
        return  build('drive', 'v3', credentials=creds)
        #return build_service('drive', 'v2', http=self.auth_mgr.get_auth_http())

    def get_data_dir(self):

        folder = self.config['gbd_data_folder']
        query_str = "title='{0}'".format(folder)

        results = self.drive.files().list(q=query_str).execute()
        items = filter(lambda x: not x['labels']['trashed'], results['items'])
        if len(items) == 0:
            if not self.config.get('create', False):
                raise RuntimeError("Can't locate `{0}'".format(folder))
            else:
                return self.create_data_dir()
        if len(items) > 1:
            raise AssertionError("{0} results found for `{1}', don't know which to use".format(len(items), folder))

        item = items[0]
        if item['mimeType'] != self.FOLDER_MIMETYPE:
            raise AssertionError("`{0}' is not a folder!! (mimeType={1})".format(folder, item['mimeType']))
        if not item['editable']:
            raise RuntimeError("folder `{0}' is readonly!".format(folder))

        return item['id']

    def create_data_dir(self):

        folder = self.config['gbd_data_folder']
        body = {
            'title': folder,
            'parents': ['root'],
            'mimeType': self.FOLDER_MIMETYPE,
        }
        result = self.drive.files().insert(body=body).execute()

        if not result:
            raise RuntimeError("Can't create folder `{0}'".format(folder))
        return result['id']

    def load_data_dir(self):

        query_str = "title='config'"
        results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
        if len(results['items']) == 0:
            self.init_data_dir()
            return
        if len(results['items']) > 1:
            raise AssertionError("config file should be unique")

        fileId = results['items'][0]['id']
        results = self.drive.files().get_media(fileId=fileId).execute()
        assert results

        self.bd_attr = json.loads(results)
        if self.bd_attr['version'] != Metadata['version']:
            raise AssertionError("Version mismatch: {0} vs {1}", Metadata['version'], self.bd_attr['version'])

    def init_data_dir(self):

        logger.info("Initializing data dir")

        if 'default_block_size' in self.config:
            block_size = int(self.config['default_block_size'])
        else:
            block_size = int(raw_input("Desired block size: "))
        if 'default_total_size' in self.config:
            total_size = int(self.config['default_total_size'])
        else:
            total_size = int(raw_input("Total size: "))
        if total_size < block_size:
            raise ValueError("block_size should not be bigger than total_size.")

        used_size = total_size // block_size * block_size
        if used_size != total_size:
            logger.info("Only using {0} bytes instead of {1}".format(used_size, total_size))

        self.bd_attr = {
            'version': Metadata['version'],
            'block_size': block_size,
            'block_count': used_size // block_size,
        }
        body = {
            'title': 'config',
            'description': 'config file for gbd',
            'mimeType': 'application/json',
            'parents': [{'id': self.data_dir}],
        }
        media_body = MediaInMemoryUpload(json.dumps(self.bd_attr), mimetype='application/json', resumable=False)

        self.drive.files().insert(body=body, media_body=media_body).execute()
    
    ## function

    def read_block(self, idx, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        if cb:
            self.que.put((idx, None, cb), pri)
        else:
            return self.sync_io(idx, None, pri)

    def write_block(self, idx, data, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        assert data and len(data) == self.block_size
        if cb:
            self.que.put((idx, data, cb), pri)
        else:
            return self.sync_io(idx, data, pri)

    def sync(self):
        logger.info("Syncing...")
        self.que.join()

    def end(self, force):
        if not force:
            self.sync()
        logger.info("End GBD")

    ## helper

    @classmethod
    def idx_to_name(cls, idx):
        return "gbd_b" + str(idx)

    def block_id(self, idx):
        with self.lock:
            if idx >= self.block_count or idx < 0:
                raise IndexError("Can't map idx {0}".format(idx))
            if self.mapping[idx] is None:
                query_str = "title='{0}'".format(self.idx_to_name(idx))
                results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
                if len(results['items']) == 1:
                    self.mapping[idx] = results['items'][0]['id']
                else:
                    assert len(results['items']) == 0
            return self.mapping[idx]

    def new_block(self, idx, data=None):

        with self.lock:

            if idx >= self.block_count or idx < 0:
                raise ValueError("Index out of bound")
            if self.mapping[idx] is not None:
                raise ValueError("None empty mapping @ {0}".format(idx))
            if data is not None:
                assert len(data) == self.block_size
            else:
                data = "\0" * self.block_size

            body = {
                'title': self.idx_to_name(idx),
                'mimeType': self.BLOCK_MIMETYPE,
                'parents': [{'id': self.data_dir}],
            }
            media_body = MediaInMemoryUpload(data, mimetype=self.BLOCK_MIMETYPE, resumable=False)

            result = self.drive.files().insert(body=body, media_body=media_body).execute()
            self.mapping[idx] = result['id']
            return result

    def sync_io(self, idx, data, pri):

        ret = []
        sem = Semaphore(0)
        def mycb(*param):
            ret.append(param)
            sem.release()

        self.que.put((idx, data, mycb), pri)
        sem.acquire()

        err, data = ret.pop()
        if err:
            raise err
        else:
            return data
示例#6
0
文件: gbd.py 项目: coquelicot/GBD
class GBD:

    FOLDER_MIMETYPE = 'application/vnd.google-apps.folder'
    BLOCK_MIMETYPE = 'application/octet-stream'

    def __init__(self, **config):

        self.config = Config.copy()
        self.config.update(config)

        self.auth_mgr = AuthManager(
            self.config['appname'],
            self.config['oauth_client_id'],
            self.config['oauth_client_secret'],
            self.config['oauth_scope'],
            self.config['oauth_redirect_uri'])
        self.drive = self.build_service()

        self.data_dir = self.get_data_dir()
        self.uuid = hashlib.sha1(self.data_dir).hexdigest()
        self.load_data_dir()

        self.block_size = self.bd_attr['block_size']
        self.block_count = self.bd_attr['block_count']
        self.total_size = self.block_size * self.block_count
        self.mapping = [None] * self.block_count
        self.que = TimedPriorityQueue()
        self.lock = Lock()

        self.running = True
        self.workers = []
        for i in xrange(self.config.get('workers', 8)):
            worker = GBDWorker(self, self.build_service())
            worker.daemon = True
            worker.start()
            self.workers.append(worker)

    ## init

    def build_service(self):
        return build_service('drive', 'v2', http=self.auth_mgr.get_auth_http())

    def get_data_dir(self):

        folder = self.config['gbd_data_folder']
        query_str = "title='{0}'".format(folder)

        results = self.drive.files().list(q=query_str).execute()
        items = filter(lambda x: not x['labels']['trashed'], results['items'])
        if len(items) == 0:
            if not self.config.get('create', False):
                raise RuntimeError("Can't locate `{0}'".format(folder))
            else:
                return self.create_data_dir()
        if len(items) > 1:
            raise AssertionError("{0} results found for `{1}', don't know which to use".format(len(items), folder))

        item = items[0]
        if item['mimeType'] != self.FOLDER_MIMETYPE:
            raise AssertionError("`{0}' is not a folder!! (mimeType={1})".format(folder, item['mimeType']))
        if not item['editable']:
            raise RuntimeError("folder `{0}' is readonly!".format(folder))

        return item['id']

    def create_data_dir(self):

        folder = self.config['gbd_data_folder']
        body = {
            'title': folder,
            'parents': ['root'],
            'mimeType': self.FOLDER_MIMETYPE,
        }
        result = self.drive.files().insert(body=body).execute()

        if not result:
            raise RuntimeError("Can't create folder `{0}'".format(folder))
        return result['id']

    def load_data_dir(self):

        query_str = "title='config'"
        results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
        if len(results['items']) == 0:
            self.init_data_dir()
            return
        if len(results['items']) > 1:
            raise AssertionError("config file should be unique")

        fileId = results['items'][0]['id']
        results = self.drive.files().get_media(fileId=fileId).execute()
        assert results

        self.bd_attr = json.loads(results)
        if self.bd_attr['version'] != Metadata['version']:
            raise AssertionError("Version mismatch: {0} vs {1}", Metadata['version'], self.bd_attr['version'])

    def init_data_dir(self):

        logger.info("Initializing data dir")

        if 'default_block_size' in self.config:
            block_size = int(self.config['default_block_size'])
        else:
            block_size = int(raw_input("Desired block size: "))
        if 'default_total_size' in self.config:
            total_size = int(self.config['default_total_size'])
        else:
            total_size = int(raw_input("Total size: "))
        if total_size < block_size:
            raise ValueError("block_size should not be bigger than total_size.")

        used_size = total_size // block_size * block_size
        if used_size != total_size:
            logger.info("Only using {0} bytes instead of {1}".format(used_size, total_size))

        self.bd_attr = {
            'version': Metadata['version'],
            'block_size': block_size,
            'block_count': used_size // block_size,
        }
        body = {
            'title': 'config',
            'description': 'config file for gbd',
            'mimeType': 'application/json',
            'parents': [{'id': self.data_dir}],
        }
        media_body = MediaInMemoryUpload(json.dumps(self.bd_attr), mimetype='application/json', resumable=False)

        self.drive.files().insert(body=body, media_body=media_body).execute()
    
    ## function

    def read_block(self, idx, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        if cb:
            self.que.put((idx, None, cb), pri)
        else:
            return self.sync_io(idx, None, pri)

    def write_block(self, idx, data, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        assert data and len(data) == self.block_size
        if cb:
            self.que.put((idx, data, cb), pri)
        else:
            return self.sync_io(idx, data, pri)

    def sync(self):
        logger.info("Syncing...")
        self.que.join()

    def end(self, force):
        if not force:
            self.sync()
        logger.info("End GBD")

    ## helper

    @classmethod
    def idx_to_name(cls, idx):
        return "gbd_b" + str(idx)

    def block_id(self, idx):
        with self.lock:
            if idx >= self.block_count or idx < 0:
                raise IndexError("Can't map idx {0}".format(idx))
            if self.mapping[idx] is None:
                query_str = "title='{0}'".format(self.idx_to_name(idx))
                results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
                if len(results['items']) == 1:
                    self.mapping[idx] = results['items'][0]['id']
                else:
                    assert len(results['items']) == 0
            return self.mapping[idx]

    def new_block(self, idx, data=None):

        with self.lock:

            if idx >= self.block_count or idx < 0:
                raise ValueError("Index out of bound")
            if self.mapping[idx] is not None:
                raise ValueError("None empty mapping @ {0}".format(idx))
            if data is not None:
                assert len(data) == self.block_size
            else:
                data = "\0" * self.block_size

            body = {
                'title': self.idx_to_name(idx),
                'mimeType': self.BLOCK_MIMETYPE,
                'parents': [{'id': self.data_dir}],
            }
            media_body = MediaInMemoryUpload(data, mimetype=self.BLOCK_MIMETYPE, resumable=False)

            result = self.drive.files().insert(body=body, media_body=media_body).execute()
            self.mapping[idx] = result['id']
            return result

    def sync_io(self, idx, data, pri):

        ret = []
        sem = Semaphore(0)
        def mycb(*param):
            ret.append(param)
            sem.release()

        self.que.put((idx, data, mycb), pri)
        sem.acquire()

        err, data = ret.pop()
        if err:
            raise err
        else:
            return data
示例#7
0
class CachedGBD:

    EMPTY = 0xffffffffffffffff

    def __init__(self, cache_file, dirty=False, *args, **kargs):

        if 'workers' not in kargs:
            kargs['workers'] = 16

        self.done = False

        self.gbd = GBD(*args, **kargs)
        self.uuid = self.gbd.uuid
        self.block_size = self.gbd.block_size
        self.block_count = self.gbd.block_count
        self.total_size = self.gbd.total_size

        self.cache = open(cache_file, 'r+b')
        self.cache_lock = Lock()
        self.entry_count = self.calc_entry_count()
        self.clean_que = RLUQueue(self.entry_count)
        self.dirty_que = RLUQueue(self.entry_count)
        self.last_modify = [0] * self.entry_count
        self.map = {}
        self.rmap = [self.EMPTY] * self.entry_count
        self.map_lock = Lock()
        self.load_cache(dirty)

        self.wb_sem = Semaphore(8)
        self.wb_daemon = Thread(target=self.do_writeback)
        self.wb_daemon.daemon = True
        self.wb_daemon.start()

        self.pull_que = TimedPriorityQueue()
        self.dque_lock = Lock()
        self.pull_delay_que = {}
        self.pull_daemon = Thread(target=self.do_pull)
        self.pull_daemon.daemon = True
        self.pull_daemon.start()

        self.done = True

    ## init

    def load_cache(self, dirty=True):

        self.cache.seek(0, os.SEEK_SET)
        cache_uuid = self.cache.read(len(self.uuid))
        if cache_uuid == "\0" * len(self.uuid):
            logger.info("The cache file is empty, not loading anything")
            for i in range(self.entry_count):
                self.clean_que.put(i)
            return
        if cache_uuid != self.uuid:
            raise AssertionError(
                "It's not the correct cache device. (uuid mismatch)")

        self.cache.seek(len(self.uuid), os.SEEK_SET)
        record = self.cache.read(8 * self.entry_count)
        for i in range(0, self.entry_count):
            entry = struct.unpack("!Q", record[i * 8:i * 8 + 8])[0]
            if entry != self.EMPTY:
                assert entry < self.block_count and entry not in self.map
                self.map[entry] = i
                self.rmap[i] = entry
                if dirty:
                    self.dirty_que.put(i)
                else:
                    self.clean_que.put(i)
                logger.debug("Map {0} => {1}".format(entry, i))
            else:
                self.clean_que.put(i)

    ## interface

    def read(self, offset, length, callback=None):

        assert 0 <= offset < offset + length <= self.total_size

        idxl = offset // self.block_size
        idxr = (offset + length - 1) // self.block_size

        cv = Condition()
        state = [idxr + 1 - idxl, None]
        data_list = [None] * state[0]

        for idx in range(idxl, idxr + 1):

            rngl = max(offset, idx * self.block_size)
            rngr = min(offset + length, (idx + 1) * self.block_size)
            shift = rngl % self.block_size
            to_read = rngr - rngl

            def gcb(idx, shift, to_read):
                def cb(err, obj, data):
                    with cv:
                        if state[1] is not None:
                            return False
                        if err:
                            state[1] = err
                            if callback:
                                callback(err, None)
                            else:
                                cv.notify()
                            return False
                        if to_read == self.block_size:
                            data_list[idx - idxl] = data
                        else:
                            with self.cache_lock:
                                self.cache.seek(
                                    self.calc_offset(obj) + shift, os.SEEK_SET)
                                data_list[idx -
                                          idxl] = self.cache.read(to_read)
                        state[0] = state[0] - 1
                        if state[0] == 0:
                            if callback is None:
                                cv.notify()
                            else:
                                callback(None, ''.join(data_list))
                    return False

                return cb

            self.pull(idx,
                      read_data=(to_read == self.block_size),
                      callback=gcb(idx, shift, to_read))

        if callback is None:
            cv.acquire()
            while state[0] != 0:
                cv.wait()
            cv.release()
            if state[1]:
                raise state[1]
            else:
                assert all(x is not None for x in data_list)
                return ''.join(data_list)

    def write(self, offset, data, callback=None):

        assert 0 <= offset < offset + len(data) <= self.total_size

        idxl = offset // self.block_size
        idxr = (offset + len(data) - 1) // self.block_size

        lock = Lock()
        state = [idxr + 1 - idxl, None]

        for idx in range(idxl, idxr + 1):

            rngl = max(offset, idx * self.block_size)
            rngr = min(offset + len(data), (idx + 1) * self.block_size)
            ndata = data[rngl - offset:rngr - offset]
            shift = rngl % self.block_size

            def gcb(shift, ndata):
                def cb(err, obj, _):
                    with lock:
                        if state[1] is not None:
                            return False
                        if err:
                            state[1] = err
                            if callback:
                                callback(err)
                            return False
                        with self.cache_lock:
                            self.cache.seek(
                                self.calc_offset(obj) + shift, os.SEEK_SET)
                            self.cache.write(ndata)
                        state[0] = state[0] - 1
                        if state[0] == 0 and callback:
                            callback(None)
                    return True

                return cb

            cb = gcb(shift, ndata)

            if len(ndata) == self.block_size:
                obj = self.pull(idx, pull_data=False, callback=cb)
            else:
                obj = self.pull(idx, callback=cb)

    def save_map(self):
        def pack(ull):
            return ''.join(chr((ull >> i) % 256) for i in range(56, -1, -8))

        logger.info("Saving map...")
        with self.cache_lock:
            self.cache.seek(0, 0)
            self.cache.write(self.uuid)
            self.cache.write(''.join(pack(ent) for ent in self.rmap))
            self.cache.close()

    def sync(self):
        logger.info("Flushing all request to gbd...")
        while True:
            with self.dque_lock:
                if self.dirty_que.empty() and self.pull_que.empty() and len(
                        self.pull_delay_que) == 0:
                    break
            time.sleep(1)
        self.gbd.sync()

    def end(self, force=False):
        if not force:
            self.sync()
        self.gbd.end(True)
        self.save_map()
        logger.info("End CachedGBD")

    ## helper

    def calc_entry_count(self):
        self.cache.seek(0, os.SEEK_END)
        entry_count = (self.cache.tell() -
                       len(self.uuid)) // (self.block_size + 8)
        assert entry_count > 0
        return entry_count

    def calc_offset(self, idx):
        return len(self.uuid) + 8 * self.entry_count + idx * self.block_size

    def pull(self, idx, pull_data=True, read_data=False, callback=None):
        assert 0 <= idx < self.block_count
        assert pull_data or not read_data
        self.pull_que.put((idx, pull_data, read_data, callback))

    ## daemon

    def check_delay_pull(self, idx):
        with self.dque_lock:
            if idx in self.pull_delay_que:
                pack = self.pull_delay_que[idx].get()
                logging.debug("Put pack {0}".format(pack))
                self.pull_que.put(pack, TimedPriorityQueue.PRI_HIGH)
                if self.pull_delay_que[idx].empty():
                    del self.pull_delay_que[idx]

    def do_pull(self):

        while True:

            pack = self.pull_que.get()

            data = None
            modify = False
            idx, pull_data, read_data, callback = pack

            with self.map_lock:
                if idx in self.map:
                    new_block = False
                    obj = self.map[idx]
                    with self.dque_lock:
                        cobj = self.clean_que.pop(obj)
                        dobj = self.dirty_que.pop(obj)
                        assert cobj is None or dobj is None
                        if cobj is None and dobj is None:
                            logging.debug("Delay {0}".format(pack))
                            if idx not in self.pull_delay_que:
                                self.pull_delay_que[idx] = Queue()
                            self.pull_delay_que[idx].put(pack)
                            continue
                else:
                    new_block = True
                    obj = self.clean_que.get()
                    if self.rmap[obj] != self.EMPTY:
                        del self.map[self.rmap[obj]]
                    self.rmap[obj] = idx
                    self.map[idx] = obj

            if not new_block:
                if read_data:
                    with self.cache_lock:
                        self.cache.seek(self.calc_offset(obj), os.SEEK_SET)
                        data = self.cache.read(self.block_size)

            else:
                logger.debug("Pull {0} => {1}".format(idx, obj))
                if pull_data or read_data:

                    def gcb(idx, obj, callback):
                        def cb(err, data):
                            if err:
                                logger.error("Pull {0} => {1}: Fail".format(
                                    idx, obj))
                                raise NotImplementedError(
                                    "Need to propagate pull error")
                            else:
                                logger.debug(
                                    "Pull {0} => {1}: Check = {2}".format(
                                        idx, obj,
                                        hashlib.sha1(data).hexdigest()))
                                with self.cache_lock:
                                    self.cache.seek(self.calc_offset(obj),
                                                    os.SEEK_SET)
                                    self.cache.write(data)
                                if callback and callback(None, obj, data):
                                    self.last_modify[obj] = time.time()
                                    self.dirty_que.put(obj)
                                else:
                                    self.clean_que.put(obj)
                                self.check_delay_pull(idx)
                                logger.debug("Pull {0} => {1}: End".format(
                                    idx, obj))

                        return cb

                    self.gbd.read_block(idx, gcb(idx, obj, callback))
                    continue
                else:
                    modify = True

            assert data is None or len(data) == self.block_size

            if callback and callback(None, obj, data):
                modify = True
            if dobj is not None or modify:
                if modify:
                    self.last_modify[obj] = time.time()
                self.dirty_que.put(obj)
            else:
                self.clean_que.put(obj)
            self.check_delay_pull(idx)

    def do_writeback(self):

        delay = 0.5

        while True:

            self.wb_sem.acquire()
            ent = self.dirty_que.get()

            to_sleep = self.last_modify[ent] + delay - time.time()
            if to_sleep > 0:
                self.wb_sem.release()
                logging.debug("Sleep wb {0}".format(to_sleep))
                self.dirty_que.unget(ent)
                time.sleep(to_sleep)
                continue

            with self.map_lock:
                idx = self.rmap[ent]
                assert self.map[idx] == ent

            logger.debug("Collected {0}".format(ent))
            with self.cache_lock:
                self.cache.seek(self.calc_offset(ent), os.SEEK_SET)
                data = self.cache.read(self.block_size)

            logger.debug("Push {0} <= {1}: Check = {2}".format(
                idx, ent,
                hashlib.sha1(data).hexdigest()))

            def gcb(idx, ent):
                def cb(err, _):
                    if err:
                        logger.warning("Push {0} <= {1}: Fail".format(
                            idx, ent))
                        self.dirty_que.put(ent)
                    else:
                        logger.debug("Push {0} <= {1}: Success".format(
                            idx, ent))
                        self.clean_que.put(ent)
                    self.check_delay_pull(idx)
                    self.wb_sem.release()

                return cb

            self.gbd.write_block(idx, data, gcb(idx, ent),
                                 TimedPriorityQueue.PRI_LOW)
示例#8
0
class CachedGBD:

    EMPTY = 0xffffffffffffffff

    def __init__(self, cache_file, dirty=False, *args, **kargs):

        if 'workers' not in kargs:
            kargs['workers'] = 16

        self.done = False

        self.gbd = GBD(*args, **kargs)
        self.uuid = self.gbd.uuid
        self.block_size = self.gbd.block_size
        self.block_count = self.gbd.block_count
        self.total_size = self.gbd.total_size

        self.cache = open(cache_file, 'r+b')
        self.cache_lock = Lock()
        self.entry_count = self.calc_entry_count()
        self.clean_que = RLUQueue(self.entry_count)
        self.dirty_que = RLUQueue(self.entry_count)
        self.last_modify = [0] * self.entry_count
        self.map = {}
        self.rmap = [self.EMPTY] * self.entry_count
        self.map_lock = Lock()
        self.load_cache(dirty)

        self.wb_sem = Semaphore(8)
        self.wb_daemon = Thread(target=self.do_writeback)
        self.wb_daemon.daemon = True
        self.wb_daemon.start()

        self.pull_que = TimedPriorityQueue()
        self.dque_lock = Lock()
        self.pull_delay_que = {}
        self.pull_daemon = Thread(target=self.do_pull)
        self.pull_daemon.daemon = True
        self.pull_daemon.start()

        self.done = True

    ## init

    def load_cache(self, dirty=True):

        self.cache.seek(0, os.SEEK_SET)
        cache_uuid = self.cache.read(len(self.uuid))
        if cache_uuid == "\0" * len(self.uuid):
            logger.info("The cache file is empty, not loading anything")
            for i in xrange(self.entry_count):
                self.clean_que.put(i)
            return
        if cache_uuid != self.uuid:
            raise AssertionError("It's not the correct cache device. (uuid mismatch)")

        self.cache.seek(len(self.uuid), os.SEEK_SET)
        record = self.cache.read(8 * self.entry_count)
        for i in xrange(0, self.entry_count):
            entry = struct.unpack("!Q", record[i*8:i*8+8])[0]
            if entry != self.EMPTY:
                assert entry < self.block_count and entry not in self.map
                self.map[entry] = i
                self.rmap[i] = entry
                if dirty:
                    self.dirty_que.put(i)
                else:
                    self.clean_que.put(i)
                logger.debug("Map {0} => {1}".format(entry, i))
            else:
                self.clean_que.put(i)

    ## interface

    def read(self, offset, length, callback=None):

        assert 0 <= offset < offset + length <= self.total_size

        idxl = offset // self.block_size
        idxr = (offset + length - 1) // self.block_size

        cv = Condition()
        state = [idxr + 1 - idxl, None]
        data_list = [None] * state[0]

        for idx in xrange(idxl, idxr + 1):

            rngl = max(offset, idx * self.block_size)
            rngr = min(offset + length, (idx + 1) * self.block_size)
            shift = rngl % self.block_size
            to_read = rngr - rngl

            def gcb(idx, shift, to_read):
                def cb(err, obj, data):
                    with cv:
                        if state[1] is not None:
                            return False
                        if err:
                            state[1] = err
                            if callback:
                                callback(err, None)
                            else:
                                cv.notify()
                            return False
                        if to_read == self.block_size:
                            data_list[idx - idxl] = data
                        else:
                            with self.cache_lock:
                                self.cache.seek(self.calc_offset(obj) + shift, os.SEEK_SET)
                                data_list[idx - idxl] = self.cache.read(to_read)
                        state[0] = state[0] - 1
                        if state[0] == 0:
                            if callback is None:
                                cv.notify()
                            else:
                                callback(None, ''.join(data_list))
                    return False
                return cb
            self.pull(idx, read_data=(to_read == self.block_size), callback=gcb(idx, shift, to_read))

        if callback is None:
            cv.acquire()
            while state[0] != 0:
                cv.wait()
            cv.release()
            if state[1]:
                raise state[1]
            else:
                assert all(x is not None for x in data_list)
                return ''.join(data_list)

    def write(self, offset, data, callback=None):

        assert 0 <= offset < offset + len(data) <= self.total_size

        idxl = offset // self.block_size
        idxr = (offset + len(data) - 1) // self.block_size

        lock = Lock()
        state = [idxr + 1 - idxl, None]

        for idx in xrange(idxl, idxr + 1):

            rngl = max(offset, idx * self.block_size)
            rngr = min(offset + len(data), (idx + 1) * self.block_size)
            ndata = data[rngl-offset:rngr-offset]
            shift = rngl % self.block_size

            def gcb(shift, ndata):
                def cb(err, obj, _):
                    with lock:
                        if state[1] is not None:
                            return False
                        if err:
                            state[1] = err
                            if callback:
                                callback(err)
                            return False
                        with self.cache_lock:
                            self.cache.seek(self.calc_offset(obj) + shift, os.SEEK_SET)
                            self.cache.write(ndata)
                        state[0] = state[0] - 1
                        if state[0] == 0 and callback:
                            callback(None)
                    return True
                return cb
            cb = gcb(shift, ndata)

            if len(ndata) == self.block_size:
                obj = self.pull(idx, pull_data=False, callback=cb)
            else:
                obj = self.pull(idx, callback=cb)

    def save_map(self):

        def pack(ull):
            return ''.join(chr((ull >> i) % 256) for i in xrange(56, -1, -8))

        logger.info("Saving map...")
        with self.cache_lock:
            self.cache.seek(0, 0)
            self.cache.write(self.uuid)
            self.cache.write(''.join(pack(ent) for ent in self.rmap))
            self.cache.close()

    def sync(self):
        logger.info("Flushing all request to gbd...")
        while True:
            with self.dque_lock:
                if self.dirty_que.empty() and self.pull_que.empty() and len(self.pull_delay_que) == 0:
                    break
            time.sleep(1)
        self.gbd.sync()

    def end(self, force=False):
        if not force:
            self.sync()
        self.gbd.end(True)
        self.save_map()
        logger.info("End CachedGBD")

    ## helper

    def calc_entry_count(self):
        self.cache.seek(0, os.SEEK_END)
        entry_count = (self.cache.tell() - len(self.uuid)) // (self.block_size + 8)
        assert entry_count > 0
        return entry_count

    def calc_offset(self, idx):
        return len(self.uuid) + 8 * self.entry_count + idx * self.block_size

    def pull(self, idx, pull_data=True, read_data=False, callback=None):
        assert 0 <= idx < self.block_count
        assert pull_data or not read_data
        self.pull_que.put((idx, pull_data, read_data, callback))

    ## daemon

    def check_delay_pull(self, idx):
        with self.dque_lock:
            if idx in self.pull_delay_que:
                pack = self.pull_delay_que[idx].get()
                logging.debug("Put pack {0}".format(pack))
                self.pull_que.put(pack, TimedPriorityQueue.PRI_HIGH)
                if self.pull_delay_que[idx].empty():
                    del self.pull_delay_que[idx]

    def do_pull(self):

        while True:

            pack = self.pull_que.get()

            data = None
            modify = False
            idx, pull_data, read_data, callback = pack

            with self.map_lock:
                if idx in self.map:
                    new_block = False
                    obj = self.map[idx]
                    with self.dque_lock:
                        cobj = self.clean_que.pop(obj)
                        dobj = self.dirty_que.pop(obj)
                        assert cobj is None or dobj is None
                        if cobj is None and dobj is None:
                            logging.debug("Delay {0}".format(pack))
                            if idx not in self.pull_delay_que:
                                self.pull_delay_que[idx] = Queue()
                            self.pull_delay_que[idx].put(pack)
                            continue
                else:
                    new_block = True
                    obj = self.clean_que.get()
                    if self.rmap[obj] != self.EMPTY:
                        del self.map[self.rmap[obj]]
                    self.rmap[obj] = idx
                    self.map[idx] = obj

            if not new_block:
                if read_data:
                    with self.cache_lock:
                        self.cache.seek(self.calc_offset(obj), os.SEEK_SET)
                        data = self.cache.read(self.block_size)

            else:
                logger.debug("Pull {0} => {1}".format(idx, obj))
                if pull_data or read_data:
                    def gcb(idx, obj, callback):
                        def cb(err, data):
                            if err:
                                logger.error("Pull {0} => {1}: Fail".format(idx, obj))
                                raise NotImplementedError("Need to propagate pull error")
                            else:
                                logger.debug("Pull {0} => {1}: Check = {2}".format(idx, obj, hashlib.sha1(data).hexdigest()))
                                with self.cache_lock:
                                    self.cache.seek(self.calc_offset(obj), os.SEEK_SET)
                                    self.cache.write(data)
                                if callback and callback(None, obj, data):
                                    self.last_modify[obj] = time.time()
                                    self.dirty_que.put(obj)
                                else:
                                    self.clean_que.put(obj)
                                self.check_delay_pull(idx)
                                logger.debug("Pull {0} => {1}: End".format(idx, obj))
                        return cb
                    self.gbd.read_block(idx, gcb(idx, obj, callback))
                    continue
                else:
                    modify = True

            assert data is None or len(data) == self.block_size

            if callback and callback(None, obj, data):
                modify = True
            if dobj is not None or modify:
                if modify:
                    self.last_modify[obj] = time.time()
                self.dirty_que.put(obj)
            else:
                self.clean_que.put(obj)
            self.check_delay_pull(idx)

    def do_writeback(self):

        delay = 0.5

        while True:

            self.wb_sem.acquire()
            ent = self.dirty_que.get()

            to_sleep = self.last_modify[ent] + delay - time.time()
            if to_sleep > 0:
                self.wb_sem.release()
                logging.debug("Sleep wb {0}".format(to_sleep))
                self.dirty_que.unget(ent)
                time.sleep(to_sleep)
                continue

            with self.map_lock:
                idx = self.rmap[ent]
                assert self.map[idx] == ent

            logger.debug("Collected {0}".format(ent))
            with self.cache_lock:
                self.cache.seek(self.calc_offset(ent), os.SEEK_SET)
                data = self.cache.read(self.block_size)

            logger.debug("Push {0} <= {1}: Check = {2}".format(idx, ent, hashlib.sha1(data).hexdigest()))

            def gcb(idx, ent):
                def cb(err, _):
                    if err:
                        logger.warning("Push {0} <= {1}: Fail".format(idx, ent))
                        self.dirty_que.put(ent)
                    else:
                        logger.debug("Push {0} <= {1}: Success".format(idx, ent))
                        self.clean_que.put(ent)
                    self.check_delay_pull(idx)
                    self.wb_sem.release()
                return cb
            self.gbd.write_block(idx, data, gcb(idx, ent), TimedPriorityQueue.PRI_LOW)