Exemplo n.º 1
0
 def _cleanUpExternalStore(self, containerName):
     from toil.jobStores.azureJobStore import _fetchAzureAccountKey
     from azure.storage.blob import BlobService
     blobService = BlobService(account_key=_fetchAzureAccountKey(
         self.accountName),
                               account_name=self.accountName)
     blobService.delete_container(containerName)
Exemplo n.º 2
0
def apathetic_container_delete(container_name, *args, **kwargs):
    conn = BlobService(*args, **kwargs)
    conn.delete_container(container_name)

    return conn
Exemplo n.º 3
0
def module_impl(rm, log, params, check_mode=False):

    if not HAS_AZURE:
        raise Exception("The Azure python sdk is not installed (try 'pip install azure')")

    if not HAS_REQUESTS:
        raise Exception("The requests python module is not installed (try 'pip install requests')")

    resource_group = params.get('resource_group')
    account_name = params.get('account_name')
    container_name = params.get('container_name')
    mode = params.get('mode')
    x_ms_meta_name_values = params.get('x_ms_meta_name_values')
    x_ms_blob_public_access = params.get('x_ms_blob_public_access')
    x_ms_blob_cache_control = params.get('x_ms_blob_cache_control')
    x_ms_blob_content_encoding = params.get('x_ms_blob_content_encoding')
    x_ms_blob_content_language = params.get('x_ms_blob_content_language')
    x_ms_blob_content_type = params.get('x_ms_blob_content_type')
    prefix = params.get('prefix')
    marker = params.get('marker')
    max_results = params.get('max_results')
    blob_name = params.get('blob_name')
    file_path = params.get('file_path')
    overwrite = params.get('overwrite')
    permissions = params.get('permissions')
    hours = params.get('hours')
    days = params.get('days')
    access_token = params.get('access_token')

    results = dict(changed=False)

    storage_client = rm.storage_client
    
    if not resource_group:
        raise Exception("Parameter error: resource_group cannot be None.")
    
    if not account_name:
        raise Exception("Parameter error: account_name cannot be None.")

    if not container_name:
        raise Exception("Parameter error: container_name cannot be None.")

    if not NAME_PATTERN.match(container_name):
        raise Exception("Parameter error: container_name must consist of lowercase letters, numbers and hyphens. It must begin with " +
            "a letter or number. It may not contain two consecutive hyphens.")

    # add file path validation

    results['account_name'] = account_name
    results['resource_group'] = resource_group 
    results['container_name'] = container_name

    # put (upload), get (download), geturl (return download url (Ansible 1.3+), getstr (download object as string (1.3+)), list (list keys (2.0+)), create (bucket), delete (bucket), and delobj (delete object)
    try:
        log('Getting keys')
        keys = {}
        response = storage_client.storage_accounts.list_keys(resource_group, account_name)
        keys[KeyName.key1] = response.storage_account_keys.key1
        keys[KeyName.key2] = response.storage_account_keys.key2
    except AzureHttpError as e:
        log('Error getting keys for account %s' % account_name)
        raise Exception(str(e.message))

    try:
        log('Create blob service')
        bs = BlobService(account_name, keys[KeyName.key1])
    except Exception as e:
        log('Error creating blob service.')
        raise Exception(str(e.args[0]))

    if mode == 'create':
        container = get_container_facts(bs, container_name)
        if container is not None:
            # container exists
            results['container'] = container
            results['msg'] = "Container already exists."
            return results
        # create the container
        if not check_mode:
            log('Create container %s' % container_name)
            bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['container'] = get_container_facts(bs, container_name)
        results['msg'] = "Container created successfully."
        results['changed'] = True
        return results

    if mode == 'update':
        container = get_container_facts(bs, container_name)
        if container is None:
            # container does not exist
            if not check_mode:
                log('Create container %s' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['changed'] = True
            results['msg'] = 'Container created successfully.'
            return results     
        # update existing container
        results['msg'] = "Container not changed."
        if x_ms_meta_name_values:
            if not check_mode:
                log('Update x_ms_meta_name_values for container %s' % container_name)
                bs.set_container_metadata(container_name, x_ms_meta_name_values)
            results['changed'] = True
            results['msg'] = 'Container meta data updated successfully.'
        if x_ms_blob_public_access:
            access = x_ms_blob_public_access
            if x_ms_blob_public_access == 'private':
                access = None
            if not check_mode:
                log('Set access to %s for container %s' % (access, container_name))
                bs.set_container_acl(container_name=container_name, x_ms_blob_public_access=access)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        if permissions:
            if hours == 0 and days == 0:
                raise Exception("Parameter error: expecting hours > 0 or days > 0")
            id = "%s-%s" % (container_name, permissions) 
            si = get_identifier(id, hours, days, permissions)
            identifiers = SignedIdentifiers()
            identifiers.signed_identifiers.append(si)
            if not check_mode:
                log('Set permissions to %s for container %s' % (permissions, container_name))
                bs.set_container_acl(container_name=container_name,signed_identifiers=identifiers)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        results['container'] = get_container_facts(bs, container_name)
        return results

    if mode == 'delete':
        container = get_container_facts(bs, container_name)
        if container is None:
            results['msg'] = "Container %s could not be found." % container_name
            return results
        if not check_mode:
            log('Deleting container %s' % container_name)
            bs.delete_container(container_name)
        results['changed'] = True
        results['msg'] = 'Container deleted successfully.'
        return results

    if mode == 'delete_blob':
        if blob_name is None:
            raise Exception("Parameter error: blob_name cannot be None.")
        
        container = container_check(bs, container_name)
        blob = get_blob_facts(bs, container_name, blob_name)

        if not blob:
            results['msg'] = 'Blob %s could not be found in container %s.' % (blob_name, container_name)
            return results

        if not check_mode:
            log('Deleteing %s from container %s.' % (blob_name, container_name))
            bs.delete_blob(container_name, blob_name)
        results['changed'] = True
        results['msg'] = 'Blob successfully deleted.'
        return results

    if mode == 'put':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        if not file_path :
            raise Exception("Parameter error: file_path cannot be None.")

        if not path_check(file_path):
            raise Exception("File %s does not exist." % file_path)

        container = get_container_facts(bs, container_name)
        blob = None
        if container is not None:
            blob = get_blob_facts(bs, container_name, blob_name)

        if container is not None and blob is not None:
            # both container and blob already exist
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)
            results['container'] = container
            results['blob'] = blob

            if md5_local == md5_remote:
                sum_matches = True
                results['msg'] = 'File checksums match. File not uploaded.'
                if overwrite == 'always':
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
                else:
                    results['msg'] = "WARNING: Checksums do not match. Use overwrite parameter to force upload."
            return results

        if container is None:
            # container does not exist. create container and upload.
            if not check_mode:
                log('Creating container %s.' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['conainer'] = get_container_facts(bs, container_name)
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully created container and uploaded file.'
            return results

        if container is not None:
            # container exists. just upload.
            if not check_mode:
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully updloaded file.'
            return results

    if mode == 'list':
        container = container_check(bs, container_name)
        response = bs.list_blobs(
            container_name,
            prefix,
            marker,
            max_results
        )
        results['blobs'] = []
        for blob in response.blobs:
            b = dict(
                name = blob.name,
                snapshot = blob.snapshot,
                last_modified = blob.properties.last_modified,
                content_length = blob.properties.content_length,
                blob_type = blob.properties.blob_type,
            )
            results['blobs'].append(b)
        return results

    if mode == 'get':
        if file_path is None:
            raise Exception("Parameter error: file_path cannot be None.")
        
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        path_exists = path_check(file_path)
        
        if not path_exists or overwrite == 'always':
            if not check_mode:
                bs.get_blob_to_path(container_name, blob_name, file_path)
            results['changed'] = True
            results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
            return results

        if path_exists:
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)

            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] ="WARNING: Checksums do not match. Use overwrite parameter to force download."
        
        if sum_matches is True and overwrite == 'never':
            results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
        
        return results

    if mode == 'get_url':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)

        url = bs.make_blob_url(
            container_name=container_name,
            blob_name=blob_name,
            sas_token=access_token)
        results['url'] = url
        results['msg'] = "Url: %s" % url
        return results

    if mode == 'get_token':
        if hours == 0 and days == 0:
            raise Exception("Parameter error: expecting hours > 0 or days > 0")
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        results['blob_name'] = blob_name
        sap = get_shared_access_policy(permissions, hours=hours, days=days)
        token = bs.generate_shared_access_signature(container_name, blob_name, sap)
        results['access_token'] = token
        return results
Exemplo n.º 4
0
 def _cleanUpExternalStore(self, containerName):
     from toil.jobStores.azureJobStore import _fetchAzureAccountKey
     from azure.storage.blob import BlobService
     blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName),
                               account_name=self.accountName)
     blobService.delete_container(containerName)
Exemplo n.º 5
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
                                    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                     st_uid=getuid(), st_size=0,
                                     st_mtime=int(time.time())))

        self.containers = cmap   # destroys fs tree cache resistant to misses

    def _parse_path(self, path):    # returns </dir, file(=None)>
        if path.count('/') > 1:     # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:                       # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]   # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0,
                               st_uid=getuid(), st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:    # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1,
                     st_uid=getuid(), st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')     # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:                    # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except AzureMissingResourceHttpError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except AzureException as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0     # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:   # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) * block_size,
                            len(data))]
                        block_id = base64.encodestring('%s_%s' % (rd,
                            (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except AzureException:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0     # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
Exemplo n.º 6
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755),
                         st_uid=getuid(),
                         st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                   st_uid=getuid(),
                                   st_size=0,
                                   st_mtime=int(time.time())))

        self.containers = cmap  # destroys fs tree cache resistant to misses

    def _parse_path(self, path):  # returns </dir, file(=None)>
        if path.count('/') > 1:  # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:  # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]  # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755),
                               st_size=0,
                               st_uid=getuid(),
                               st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644),
                                st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:  # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode),
                    st_size=0,
                    st_nlink=1,
                    st_uid=getuid(),
                    st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')  # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:  # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except AzureMissingResourceHttpError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except AzureException as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0  # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:  # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) *
                                                       block_size, len(data))]
                        block_id = base64.encodestring(
                            '%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except AzureException:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0  # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
Exemplo n.º 7
0
class AzureFS(LoggingMixIn, Operations):
    """
    Azure Blob Storage filesystem
    """

    blobs = None
    containers = dict()  # {cname: {stat:dict, files:None|{fname: stat}}
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self._rebuild_container_list()

    def _rebuild_container_list(self):
        cmap = dict()
        cnames = set()

        for c in self.blobs.list_containers():
            cstat = make_stat(stat.S_IFDIR | 0755, c.properties)

            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={}, stat=make_stat(stat.S_IFDIR | 0755))
        self.containers = cmap  # destroys fs tree cache resistant to misses

    @staticmethod
    def _parse_path(path):  # returns </dir, file(=None)>
        if path.count('/') > 1:  # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:  # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    @staticmethod
    def _parse_container(path):
        base_container = path[1:]  # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False, force=False):
        log.debug("get_dir: contents_required=%s, force=%s,"
                  " has_container=%s, path=%s",
                  "t" if contents_required else "f",
                  "t" if force else "f",
                  "t" if path in self.containers else "f",
                  path)
        cname = self._parse_container(path)

        if 'process' in self.containers['/' + cname] and \
                self.containers['/' + cname]['process'] is not None:
            p = self.containers['/' + cname]['process']
            if not p.is_alive():
                p.join()
                self.containers['/' + cname]['process'] = None

        if not self.containers:
            log.info("get_dir: rebuilding container list")
            self._rebuild_container_list()

        if path in self.containers:
            container = self.containers[path]
            if not contents_required:
                return container
            if not force and container['files'] is not None:
                return container

        if '/' + cname not in self.containers:
            log.info("get_dir: no such container: /%s", cname)
            raise FuseOSError(errno.ENOENT)
        else:
            container = self.containers['/' + cname]
            try:
                log.info(">>>> %s - %s ",
                         cname,
                         container['process'])
            except KeyError:
                log.info(">>>> no process: %s " % cname)
            if container['files'] is None or force is True:
                # fetch contents of container
                log.info("Contents not found in the cache index: %s", cname)

                process = container.get('process', None)
                if process is not None and process.is_alive():
                    # We do nothing. Some thread is still working,
                    # getting list of blobs from the container.
                    log.info("Fetching blob list for '%s' is already"
                             " handled by %s", cname, process)
                else:
                    # No thread running for this container, launch a new one
                    m = Manager()
                    files = m.dict()
                    process = Process(target=get_files_from_blob_service,
                                      args=(self.blobs, cname, files),
                                      name="list-blobs/%s" % cname)
                    process.daemon = True
                    process.start()
                    container['process'] = process
                    log.info("Started blob list retrieval for '%s': %s",
                             cname, process)
                    container['files'] = files
            return container

    def _get_file(self, path):
        d, f = self._parse_path(path)
        log.debug("get_file: requested path=%s (d=%s, f=%s)", path, d, f)
        directory = self._get_dir(d, True)
        files = None
        if directory is not None:
            files = directory['files']
            if f in files:
                return files[f]

        if not hasattr(self, "_get_file_noent"):
            self._get_file_noent = {}

        last_check = self._get_file_noent.get(path, 0)
        if time.time() - last_check <= 30:
            # Negative TTL is 30 seconds (hardcoded for now)
            log.info("get_file: cache says to reply negative for %s", path)
            return None

        # Check if file now exists and our caches are just stale.
        try:
            c = self._parse_container(d)
            p = path[path.find('/', 1) + 1:]
            props = self.blobs.get_blob_properties(c, p)
            log.info("get_file: found locally unknown remote file %s: %s",
                     path, repr(props))

            node = make_stat(stat.S_IFREG | 0644, props)

            if node['st_size'] > 0:
                log.info("get_file: properties for %s: %s", path, repr(node))
                # Remember this, so we won't have to re-query it.
                files[f] = node
                if path in self._get_file_noent:
                    del self._get_file_noent[path]
                return node
            else:
                # TODO: FIXME: HACK: We currently ignore empty files.
                # Sometimes the file is not yet here and is still uploading.
                # Such files have "content-length: 0". Ignore those for now.
                log.warning("get_file: the file %s is not yet here (size=%s)",
                            path, node['st_size'])
                self._get_file_noent[path] = time.time()
                return None
        except AzureMissingResourceHttpError:
            log.info("get_file: remote confirms non-existence of %s", path)
            self._get_file_noent[path] = time.time()
            return None
        except AzureException as e:
            log.error("get_file: exception while querying remote for %s: %s",
                      path, repr(e))
            self._get_file_noent[path] = time.time()

        return None

    def getattr(self, path, fh=None):
        log.debug("getattr: path=%s", path)
        d, f = self._parse_path(path)

        if f is None:
            return self._get_dir(d)['stat']
        else:
            file_obj = self._get_file(path)
            if file_obj:
                return file_obj

        log.warning("getattr: no such file: %s", path)
        raise FuseOSError(errno.ENOENT)

    def mkdir(self, path, mode):
        if path.count('/') <= 1:  # create on root
            name = path[1:]
            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long")
                raise FuseOSError(errno.ENAMETOOLONG)

            if not re.match(RE_CONTAINER_NAME, name):
                log.error("Invalid container name: '%s'", name)
                raise FuseOSError(errno.EACCES)

            resp = self.blobs.create_container(name)
            if resp:
                self._rebuild_container_list()
                log.info("CONTAINER %s CREATED", name)
            else:
                log.error("Invalid container name or container already exists")
                raise FuseOSError(errno.EACCES)
        else:
            # TODO: Support 2nd+ level directory creation
            raise FuseOSError(errno.ENOSYS)

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(errno.EACCES)
        else:
            # TODO: Support 2nd+ level directories
            raise FuseOSError(errno.ENOSYS)

    def create(self, path, mode, fi=None):
        node = make_stat(stat.S_IFREG | mode)
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(errno.ENOSYS)

        if f == ".__refresh_cache__":
            log.info("Refresh cache forced (%s)" % f)
            self._get_dir(path, True, True)
            return self.open(path, data='')

        directory = self._get_dir(d, True)
        if not directory:
            raise FuseOSError(errno.EIO)
        directory['files'][f] = node

        return self.open(path, data='')  # reusing handler provider

    def open(self, path, flags=0, data=None):
        log.info("open: path=%s; flags=%s", path, flags)
        if data is None:
            # Download contents
            c_name = self._parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                self.blobs.get_blob_metadata(c_name, f_name)
            except AzureMissingResourceHttpError:
                directory = self._get_dir('/' + c_name, True)
                if f_name in directory['files']:
                    del directory['files'][f_name]
                log.info("open: remote says there is no such file: c=%s f=%s",
                         c_name, f_name)
                raise FuseOSError(errno.ENOENT)
            except AzureHttpError as e:
                log.error("Read blob failed with HTTP %d", e.status_code)
                raise FuseOSError(errno.EAGAIN)
            except AzureException as e:
                log.exception("Read blob failed with exception: %s", repr(e))
                raise FuseOSError(errno.EAGAIN)
        self.fd += 1
        return self.fd

    def truncate(self, path, length, fh=None):
        return 0  # assume done, no need

    def write(self, path, data, offset, fh=None):
        # TODO: Re-implement writing
        raise FuseOSError(errno.EPERM)

    def unlink(self, path):
        c_name = self._parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(errno.ENOENT)
        except:
            raise FuseOSError(errno.EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys()
                                  if x != '/']

        directory = self._get_dir(path, True)
        if not directory:
            log.info("readdir: no such file: %s", path)
            raise FuseOSError(errno.ENOENT)
        return ['.', '..'] + directory['files'].keys()

    def read(self, path, size, offset, fh):
        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            byte_range = "bytes=%s-%s" % (offset, offset + size - 1)
            log.debug("read range: %s", byte_range)
            data = self.blobs.get_blob(c_name, f_name, snapshot=None,
                                       x_ms_range=byte_range)
            return data
        except AzureHttpError as e:
            if e.status_code == 404:
                raise FuseOSError(errno.ENOENT)
            elif e.status_code == 403:
                raise FuseOSError(errno.EPERM)
            else:
                log.error("Read blob failed with HTTP %d", e.status_code)
                raise FuseOSError(errno.EAGAIN)

    def statfs(self, path):
        return dict(f_bsize=4096, f_blocks=1, f_bavail=sys.maxint)

    def rename(self, old, new):
        # TODO: Implement renaming
        raise FuseOSError(errno.ENOSYS)

    def symlink(self, target, source):
        raise FuseOSError(errno.ENOSYS)

    def getxattr(self, path, name, position=0):
        return ''

    def chmod(self, path, mode):
        pass

    def chown(self, path, uid, gid):
        pass