示例#1
0
class GBD:

    FOLDER_MIMETYPE = 'application/vnd.google-apps.folder'
    BLOCK_MIMETYPE = 'application/octet-stream'

    def __init__(self, **config):

        self.config = Config.copy()
        self.config.update(config)

        self.drive = self.build_service()

        self.data_dir = self.get_data_dir()
        self.uuid = hashlib.sha1(self.data_dir).hexdigest()
        self.load_data_dir()

        self.block_size = self.bd_attr['block_size']
        self.block_count = self.bd_attr['block_count']
        self.total_size = self.block_size * self.block_count
        self.mapping = [None] * self.block_count
        self.que = TimedPriorityQueue()
        self.lock = Lock()

        self.running = True
        self.workers = []
        for i in xrange(self.config.get('workers', 8)):
            worker = GBDWorker(self, self.build_service())
            worker.daemon = True
            worker.start()
            self.workers.append(worker)

    ## init

    def build_service(self):
        SCOPES = ['https://www.googleapis.com/auth/drive']
        creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
        if os.path.exists('token.json'):
            creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    'credentials.json', SCOPES)
                creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
            with open('token.json', 'w') as token:
                token.write(creds.to_json())
        return  build('drive', 'v3', credentials=creds)
        #return build_service('drive', 'v2', http=self.auth_mgr.get_auth_http())

    def get_data_dir(self):

        folder = self.config['gbd_data_folder']
        query_str = "title='{0}'".format(folder)

        results = self.drive.files().list(q=query_str).execute()
        items = filter(lambda x: not x['labels']['trashed'], results['items'])
        if len(items) == 0:
            if not self.config.get('create', False):
                raise RuntimeError("Can't locate `{0}'".format(folder))
            else:
                return self.create_data_dir()
        if len(items) > 1:
            raise AssertionError("{0} results found for `{1}', don't know which to use".format(len(items), folder))

        item = items[0]
        if item['mimeType'] != self.FOLDER_MIMETYPE:
            raise AssertionError("`{0}' is not a folder!! (mimeType={1})".format(folder, item['mimeType']))
        if not item['editable']:
            raise RuntimeError("folder `{0}' is readonly!".format(folder))

        return item['id']

    def create_data_dir(self):

        folder = self.config['gbd_data_folder']
        body = {
            'title': folder,
            'parents': ['root'],
            'mimeType': self.FOLDER_MIMETYPE,
        }
        result = self.drive.files().insert(body=body).execute()

        if not result:
            raise RuntimeError("Can't create folder `{0}'".format(folder))
        return result['id']

    def load_data_dir(self):

        query_str = "title='config'"
        results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
        if len(results['items']) == 0:
            self.init_data_dir()
            return
        if len(results['items']) > 1:
            raise AssertionError("config file should be unique")

        fileId = results['items'][0]['id']
        results = self.drive.files().get_media(fileId=fileId).execute()
        assert results

        self.bd_attr = json.loads(results)
        if self.bd_attr['version'] != Metadata['version']:
            raise AssertionError("Version mismatch: {0} vs {1}", Metadata['version'], self.bd_attr['version'])

    def init_data_dir(self):

        logger.info("Initializing data dir")

        if 'default_block_size' in self.config:
            block_size = int(self.config['default_block_size'])
        else:
            block_size = int(raw_input("Desired block size: "))
        if 'default_total_size' in self.config:
            total_size = int(self.config['default_total_size'])
        else:
            total_size = int(raw_input("Total size: "))
        if total_size < block_size:
            raise ValueError("block_size should not be bigger than total_size.")

        used_size = total_size // block_size * block_size
        if used_size != total_size:
            logger.info("Only using {0} bytes instead of {1}".format(used_size, total_size))

        self.bd_attr = {
            'version': Metadata['version'],
            'block_size': block_size,
            'block_count': used_size // block_size,
        }
        body = {
            'title': 'config',
            'description': 'config file for gbd',
            'mimeType': 'application/json',
            'parents': [{'id': self.data_dir}],
        }
        media_body = MediaInMemoryUpload(json.dumps(self.bd_attr), mimetype='application/json', resumable=False)

        self.drive.files().insert(body=body, media_body=media_body).execute()
    
    ## function

    def read_block(self, idx, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        if cb:
            self.que.put((idx, None, cb), pri)
        else:
            return self.sync_io(idx, None, pri)

    def write_block(self, idx, data, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        assert data and len(data) == self.block_size
        if cb:
            self.que.put((idx, data, cb), pri)
        else:
            return self.sync_io(idx, data, pri)

    def sync(self):
        logger.info("Syncing...")
        self.que.join()

    def end(self, force):
        if not force:
            self.sync()
        logger.info("End GBD")

    ## helper

    @classmethod
    def idx_to_name(cls, idx):
        return "gbd_b" + str(idx)

    def block_id(self, idx):
        with self.lock:
            if idx >= self.block_count or idx < 0:
                raise IndexError("Can't map idx {0}".format(idx))
            if self.mapping[idx] is None:
                query_str = "title='{0}'".format(self.idx_to_name(idx))
                results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
                if len(results['items']) == 1:
                    self.mapping[idx] = results['items'][0]['id']
                else:
                    assert len(results['items']) == 0
            return self.mapping[idx]

    def new_block(self, idx, data=None):

        with self.lock:

            if idx >= self.block_count or idx < 0:
                raise ValueError("Index out of bound")
            if self.mapping[idx] is not None:
                raise ValueError("None empty mapping @ {0}".format(idx))
            if data is not None:
                assert len(data) == self.block_size
            else:
                data = "\0" * self.block_size

            body = {
                'title': self.idx_to_name(idx),
                'mimeType': self.BLOCK_MIMETYPE,
                'parents': [{'id': self.data_dir}],
            }
            media_body = MediaInMemoryUpload(data, mimetype=self.BLOCK_MIMETYPE, resumable=False)

            result = self.drive.files().insert(body=body, media_body=media_body).execute()
            self.mapping[idx] = result['id']
            return result

    def sync_io(self, idx, data, pri):

        ret = []
        sem = Semaphore(0)
        def mycb(*param):
            ret.append(param)
            sem.release()

        self.que.put((idx, data, mycb), pri)
        sem.acquire()

        err, data = ret.pop()
        if err:
            raise err
        else:
            return data
示例#2
0
文件: gbd.py 项目: coquelicot/GBD
class GBD:

    FOLDER_MIMETYPE = 'application/vnd.google-apps.folder'
    BLOCK_MIMETYPE = 'application/octet-stream'

    def __init__(self, **config):

        self.config = Config.copy()
        self.config.update(config)

        self.auth_mgr = AuthManager(
            self.config['appname'],
            self.config['oauth_client_id'],
            self.config['oauth_client_secret'],
            self.config['oauth_scope'],
            self.config['oauth_redirect_uri'])
        self.drive = self.build_service()

        self.data_dir = self.get_data_dir()
        self.uuid = hashlib.sha1(self.data_dir).hexdigest()
        self.load_data_dir()

        self.block_size = self.bd_attr['block_size']
        self.block_count = self.bd_attr['block_count']
        self.total_size = self.block_size * self.block_count
        self.mapping = [None] * self.block_count
        self.que = TimedPriorityQueue()
        self.lock = Lock()

        self.running = True
        self.workers = []
        for i in xrange(self.config.get('workers', 8)):
            worker = GBDWorker(self, self.build_service())
            worker.daemon = True
            worker.start()
            self.workers.append(worker)

    ## init

    def build_service(self):
        return build_service('drive', 'v2', http=self.auth_mgr.get_auth_http())

    def get_data_dir(self):

        folder = self.config['gbd_data_folder']
        query_str = "title='{0}'".format(folder)

        results = self.drive.files().list(q=query_str).execute()
        items = filter(lambda x: not x['labels']['trashed'], results['items'])
        if len(items) == 0:
            if not self.config.get('create', False):
                raise RuntimeError("Can't locate `{0}'".format(folder))
            else:
                return self.create_data_dir()
        if len(items) > 1:
            raise AssertionError("{0} results found for `{1}', don't know which to use".format(len(items), folder))

        item = items[0]
        if item['mimeType'] != self.FOLDER_MIMETYPE:
            raise AssertionError("`{0}' is not a folder!! (mimeType={1})".format(folder, item['mimeType']))
        if not item['editable']:
            raise RuntimeError("folder `{0}' is readonly!".format(folder))

        return item['id']

    def create_data_dir(self):

        folder = self.config['gbd_data_folder']
        body = {
            'title': folder,
            'parents': ['root'],
            'mimeType': self.FOLDER_MIMETYPE,
        }
        result = self.drive.files().insert(body=body).execute()

        if not result:
            raise RuntimeError("Can't create folder `{0}'".format(folder))
        return result['id']

    def load_data_dir(self):

        query_str = "title='config'"
        results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
        if len(results['items']) == 0:
            self.init_data_dir()
            return
        if len(results['items']) > 1:
            raise AssertionError("config file should be unique")

        fileId = results['items'][0]['id']
        results = self.drive.files().get_media(fileId=fileId).execute()
        assert results

        self.bd_attr = json.loads(results)
        if self.bd_attr['version'] != Metadata['version']:
            raise AssertionError("Version mismatch: {0} vs {1}", Metadata['version'], self.bd_attr['version'])

    def init_data_dir(self):

        logger.info("Initializing data dir")

        if 'default_block_size' in self.config:
            block_size = int(self.config['default_block_size'])
        else:
            block_size = int(raw_input("Desired block size: "))
        if 'default_total_size' in self.config:
            total_size = int(self.config['default_total_size'])
        else:
            total_size = int(raw_input("Total size: "))
        if total_size < block_size:
            raise ValueError("block_size should not be bigger than total_size.")

        used_size = total_size // block_size * block_size
        if used_size != total_size:
            logger.info("Only using {0} bytes instead of {1}".format(used_size, total_size))

        self.bd_attr = {
            'version': Metadata['version'],
            'block_size': block_size,
            'block_count': used_size // block_size,
        }
        body = {
            'title': 'config',
            'description': 'config file for gbd',
            'mimeType': 'application/json',
            'parents': [{'id': self.data_dir}],
        }
        media_body = MediaInMemoryUpload(json.dumps(self.bd_attr), mimetype='application/json', resumable=False)

        self.drive.files().insert(body=body, media_body=media_body).execute()
    
    ## function

    def read_block(self, idx, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        if cb:
            self.que.put((idx, None, cb), pri)
        else:
            return self.sync_io(idx, None, pri)

    def write_block(self, idx, data, cb=None, pri=TimedPriorityQueue.PRI_NORMAL):
        assert 0 <= idx < self.block_count
        assert data and len(data) == self.block_size
        if cb:
            self.que.put((idx, data, cb), pri)
        else:
            return self.sync_io(idx, data, pri)

    def sync(self):
        logger.info("Syncing...")
        self.que.join()

    def end(self, force):
        if not force:
            self.sync()
        logger.info("End GBD")

    ## helper

    @classmethod
    def idx_to_name(cls, idx):
        return "gbd_b" + str(idx)

    def block_id(self, idx):
        with self.lock:
            if idx >= self.block_count or idx < 0:
                raise IndexError("Can't map idx {0}".format(idx))
            if self.mapping[idx] is None:
                query_str = "title='{0}'".format(self.idx_to_name(idx))
                results = self.drive.children().list(folderId=self.data_dir, q=query_str).execute()
                if len(results['items']) == 1:
                    self.mapping[idx] = results['items'][0]['id']
                else:
                    assert len(results['items']) == 0
            return self.mapping[idx]

    def new_block(self, idx, data=None):

        with self.lock:

            if idx >= self.block_count or idx < 0:
                raise ValueError("Index out of bound")
            if self.mapping[idx] is not None:
                raise ValueError("None empty mapping @ {0}".format(idx))
            if data is not None:
                assert len(data) == self.block_size
            else:
                data = "\0" * self.block_size

            body = {
                'title': self.idx_to_name(idx),
                'mimeType': self.BLOCK_MIMETYPE,
                'parents': [{'id': self.data_dir}],
            }
            media_body = MediaInMemoryUpload(data, mimetype=self.BLOCK_MIMETYPE, resumable=False)

            result = self.drive.files().insert(body=body, media_body=media_body).execute()
            self.mapping[idx] = result['id']
            return result

    def sync_io(self, idx, data, pri):

        ret = []
        sem = Semaphore(0)
        def mycb(*param):
            ret.append(param)
            sem.release()

        self.que.put((idx, data, mycb), pri)
        sem.acquire()

        err, data = ret.pop()
        if err:
            raise err
        else:
            return data