Пример #1
0
class _BlobStorageTestCase(_TestCase):
    def _get_container_name(self, handler_name):
        container = _get_handler_config_value(handler_name, 'container')
        if container:
            container = container.replace('_', '-').lower()
        return container

    def setUp(self):
        self.service = BlobService(ACCOUNT_NAME, ACCOUNT_KEY)
        # ensure that there's no log file in the container before each test
        containers = [c.name for c in self.service.list_containers()]
        for handler in LOGGING['handlers']:
            container = self._get_container_name(handler)
            if container in containers:
                filename = _get_handler_config_value(handler, 'filename')
                basename = os.path.basename(filename)
                for blob in self.service.list_blobs(container,
                                                    prefix=basename):
                    self.service.delete_blob(container, blob.name)
Пример #2
0
class _BlobStorageTestCase(_TestCase):

    def _get_container_name(self, handler_name):
        container = _get_handler_config_value(handler_name, 'container')
        if container:
            container = container.replace('_', '-').lower()
        return container

    def setUp(self):
        self.service = BlobService(ACCOUNT_NAME, ACCOUNT_KEY)
        # ensure that there's no log file in the container before each test
        containers = [c.name for c in self.service.list_containers()]
        for handler in LOGGING['handlers']:
            container = self._get_container_name(handler)
            if container in containers:
                filename = _get_handler_config_value(handler, 'filename')
                basename = os.path.basename(filename)
                for blob in self.service.list_blobs(container, prefix=basename):
                    self.service.delete_blob(container, blob.name)
Пример #3
0
def module_impl(rm, log, params, check_mode=False):

    if not HAS_AZURE:
        raise Exception("The Azure python sdk is not installed (try 'pip install azure')")

    if not HAS_REQUESTS:
        raise Exception("The requests python module is not installed (try 'pip install requests')")

    resource_group = params.get('resource_group')
    account_name = params.get('account_name')
    container_name = params.get('container_name')
    mode = params.get('mode')
    x_ms_meta_name_values = params.get('x_ms_meta_name_values')
    x_ms_blob_public_access = params.get('x_ms_blob_public_access')
    x_ms_blob_cache_control = params.get('x_ms_blob_cache_control')
    x_ms_blob_content_encoding = params.get('x_ms_blob_content_encoding')
    x_ms_blob_content_language = params.get('x_ms_blob_content_language')
    x_ms_blob_content_type = params.get('x_ms_blob_content_type')
    prefix = params.get('prefix')
    marker = params.get('marker')
    max_results = params.get('max_results')
    blob_name = params.get('blob_name')
    file_path = params.get('file_path')
    overwrite = params.get('overwrite')
    permissions = params.get('permissions')
    hours = params.get('hours')
    days = params.get('days')
    access_token = params.get('access_token')

    results = dict(changed=False)

    storage_client = rm.storage_client
    
    if not resource_group:
        raise Exception("Parameter error: resource_group cannot be None.")
    
    if not account_name:
        raise Exception("Parameter error: account_name cannot be None.")

    if not container_name:
        raise Exception("Parameter error: container_name cannot be None.")

    if not NAME_PATTERN.match(container_name):
        raise Exception("Parameter error: container_name must consist of lowercase letters, numbers and hyphens. It must begin with " +
            "a letter or number. It may not contain two consecutive hyphens.")

    # add file path validation

    results['account_name'] = account_name
    results['resource_group'] = resource_group 
    results['container_name'] = container_name

    # put (upload), get (download), geturl (return download url (Ansible 1.3+), getstr (download object as string (1.3+)), list (list keys (2.0+)), create (bucket), delete (bucket), and delobj (delete object)
    try:
        log('Getting keys')
        keys = {}
        response = storage_client.storage_accounts.list_keys(resource_group, account_name)
        keys[KeyName.key1] = response.storage_account_keys.key1
        keys[KeyName.key2] = response.storage_account_keys.key2
    except AzureHttpError as e:
        log('Error getting keys for account %s' % account_name)
        raise Exception(str(e.message))

    try:
        log('Create blob service')
        bs = BlobService(account_name, keys[KeyName.key1])
    except Exception as e:
        log('Error creating blob service.')
        raise Exception(str(e.args[0]))

    if mode == 'create':
        container = get_container_facts(bs, container_name)
        if container is not None:
            # container exists
            results['container'] = container
            results['msg'] = "Container already exists."
            return results
        # create the container
        if not check_mode:
            log('Create container %s' % container_name)
            bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['container'] = get_container_facts(bs, container_name)
        results['msg'] = "Container created successfully."
        results['changed'] = True
        return results

    if mode == 'update':
        container = get_container_facts(bs, container_name)
        if container is None:
            # container does not exist
            if not check_mode:
                log('Create container %s' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['changed'] = True
            results['msg'] = 'Container created successfully.'
            return results     
        # update existing container
        results['msg'] = "Container not changed."
        if x_ms_meta_name_values:
            if not check_mode:
                log('Update x_ms_meta_name_values for container %s' % container_name)
                bs.set_container_metadata(container_name, x_ms_meta_name_values)
            results['changed'] = True
            results['msg'] = 'Container meta data updated successfully.'
        if x_ms_blob_public_access:
            access = x_ms_blob_public_access
            if x_ms_blob_public_access == 'private':
                access = None
            if not check_mode:
                log('Set access to %s for container %s' % (access, container_name))
                bs.set_container_acl(container_name=container_name, x_ms_blob_public_access=access)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        if permissions:
            if hours == 0 and days == 0:
                raise Exception("Parameter error: expecting hours > 0 or days > 0")
            id = "%s-%s" % (container_name, permissions) 
            si = get_identifier(id, hours, days, permissions)
            identifiers = SignedIdentifiers()
            identifiers.signed_identifiers.append(si)
            if not check_mode:
                log('Set permissions to %s for container %s' % (permissions, container_name))
                bs.set_container_acl(container_name=container_name,signed_identifiers=identifiers)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        results['container'] = get_container_facts(bs, container_name)
        return results

    if mode == 'delete':
        container = get_container_facts(bs, container_name)
        if container is None:
            results['msg'] = "Container %s could not be found." % container_name
            return results
        if not check_mode:
            log('Deleting container %s' % container_name)
            bs.delete_container(container_name)
        results['changed'] = True
        results['msg'] = 'Container deleted successfully.'
        return results

    if mode == 'delete_blob':
        if blob_name is None:
            raise Exception("Parameter error: blob_name cannot be None.")
        
        container = container_check(bs, container_name)
        blob = get_blob_facts(bs, container_name, blob_name)

        if not blob:
            results['msg'] = 'Blob %s could not be found in container %s.' % (blob_name, container_name)
            return results

        if not check_mode:
            log('Deleteing %s from container %s.' % (blob_name, container_name))
            bs.delete_blob(container_name, blob_name)
        results['changed'] = True
        results['msg'] = 'Blob successfully deleted.'
        return results

    if mode == 'put':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        if not file_path :
            raise Exception("Parameter error: file_path cannot be None.")

        if not path_check(file_path):
            raise Exception("File %s does not exist." % file_path)

        container = get_container_facts(bs, container_name)
        blob = None
        if container is not None:
            blob = get_blob_facts(bs, container_name, blob_name)

        if container is not None and blob is not None:
            # both container and blob already exist
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)
            results['container'] = container
            results['blob'] = blob

            if md5_local == md5_remote:
                sum_matches = True
                results['msg'] = 'File checksums match. File not uploaded.'
                if overwrite == 'always':
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
                else:
                    results['msg'] = "WARNING: Checksums do not match. Use overwrite parameter to force upload."
            return results

        if container is None:
            # container does not exist. create container and upload.
            if not check_mode:
                log('Creating container %s.' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['conainer'] = get_container_facts(bs, container_name)
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully created container and uploaded file.'
            return results

        if container is not None:
            # container exists. just upload.
            if not check_mode:
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully updloaded file.'
            return results

    if mode == 'list':
        container = container_check(bs, container_name)
        response = bs.list_blobs(
            container_name,
            prefix,
            marker,
            max_results
        )
        results['blobs'] = []
        for blob in response.blobs:
            b = dict(
                name = blob.name,
                snapshot = blob.snapshot,
                last_modified = blob.properties.last_modified,
                content_length = blob.properties.content_length,
                blob_type = blob.properties.blob_type,
            )
            results['blobs'].append(b)
        return results

    if mode == 'get':
        if file_path is None:
            raise Exception("Parameter error: file_path cannot be None.")
        
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        path_exists = path_check(file_path)
        
        if not path_exists or overwrite == 'always':
            if not check_mode:
                bs.get_blob_to_path(container_name, blob_name, file_path)
            results['changed'] = True
            results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
            return results

        if path_exists:
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)

            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] ="WARNING: Checksums do not match. Use overwrite parameter to force download."
        
        if sum_matches is True and overwrite == 'never':
            results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
        
        return results

    if mode == 'get_url':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)

        url = bs.make_blob_url(
            container_name=container_name,
            blob_name=blob_name,
            sas_token=access_token)
        results['url'] = url
        results['msg'] = "Url: %s" % url
        return results

    if mode == 'get_token':
        if hours == 0 and days == 0:
            raise Exception("Parameter error: expecting hours > 0 or days > 0")
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        results['blob_name'] = blob_name
        sap = get_shared_access_policy(permissions, hours=hours, days=days)
        token = bs.generate_shared_access_signature(container_name, blob_name, sap)
        results['access_token'] = token
        return results
Пример #4
0
blob_service = BlobService(account_name="<account_name>", account_key="<account_key>")

blob_service.create_container("datacontainer")

blob_service.create_container("datacontainer", x_ms_blob_public_access="container")

blob_service.set_container_acl("datacontainer", x_ms_blob_public_access="container")


blob_service.put_block_blob_from_path(
    "datacontainer", "datablob", "StorageClientPy.py", x_ms_blob_content_type="text/x-script.phyton"
)


blobs = []
marker = None
while True:
    batch = blob_service.list_blobs("datacontainer", marker=marker)
    blobs.extend(batch)
    if not batch.next_marker:
        break
    marker = batch.next_marker
for blob in blobs:
    print(blob.name)


blob_service.get_blob_to_path("datacontainer", "datablob", "out-StorageClientPy.py")


blob_service.delete_blob("datacontainer", "datablob")
Пример #5
0
def clearAll():
	blob_service = BlobService(account_name, account_key)
	blob_list = getBlobList(blob_service)
	print blob_list
	for blob in blob_list:
		blob_service.delete_blob(container_name, blob)
Пример #6
0
def delete_object(container, name):
    blob_service = BlobService(AZURE_ACCOUNT_NAME, AZURE_ACCOUNT_KEY)
    blob_service.delete_blob(container, name)
Пример #7
0
        move(fullFilePath, stagingDir)

        # the super secret location
        accountKeyFile = open(azureKeyLocation, 'r')
        accountKey = accountKeyFile.read().strip()
        accountKeyFile.close()

        # Get a handle on the Azure Blob Storage account
        azureStorage = BlobService(account_name=azureAccount,
                                   account_key=accountKey)

        checksumFilename = md5FullFilePath + ".md5"

        # Ensure a clean slate for pushing the new data set
        try:
            azureStorage.delete_blob(ingestContainer, filename)
            theLog.write("Existing ingest data blob found, deleting it\n\n")
            theLog.flush()

            if not isClaims:
                azureStorage.delete_blob(ingestContainer, filename.split(".")[0] + ".md5")
                theLog.write("Existing ingest checksum blob found, deleting it\n\n")
                theLog.flush()
        except AzureMissingResourceHttpError:
            pass

        # Try to put the blob out in the wild, provide MD5 for error
        # checking since M$ didn't feel the need to implement a return
        # code for this function

        # On further testing, the "content_md5" is only for header rather
Пример #8
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
                                    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                     st_uid=getuid(), st_size=0,
                                     st_mtime=int(time.time())))

        self.containers = cmap   # destroys fs tree cache resistant to misses

    def _parse_path(self, path):    # returns </dir, file(=None)>
        if path.count('/') > 1:     # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:                       # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]   # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0,
                               st_uid=getuid(), st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:    # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1,
                     st_uid=getuid(), st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')     # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:                    # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except AzureMissingResourceHttpError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except AzureException as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0     # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:   # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) * block_size,
                            len(data))]
                        block_id = base64.encodestring('%s_%s' % (rd,
                            (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except AzureException:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0     # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
class BlobSource(DataSource):
    def __init__(self):
        self.storage_account = getenv('STORAGE_ACCOUNT')
        self.blob_service = BlobService(self.storage_account,
                                        getenv('STORAGE_KEY'))

    def load(self, sparkContext, container, path):
        path = ('/' if path[0] != '/' else '') + path
        uri = 'wasb://%s@%s.blob.core.windows.net%s' % (
            container, self.storage_account, path)
        print 'Loading from %s' % uri
        return sparkContext.textFile(uri)

    def download(self, container, path):
        print 'Downloading blob from %s/%s' % (container, path)
        self.blob_service.get_blob_to_path(container, path, path)
        print 'Downloaded blob to ' + path

    def saveAsJson(self, payload, container, path):
        path = path.lstrip('/')
        print path
        print 'Saving to %s/%s' % (container, path)
        json_string = json.dumps(payload, ensure_ascii=False).encode('utf-8')
        try:
            self.blob_service.put_blob(
                container,
                path,
                json_string,
                'BlockBlob',
                x_ms_blob_cache_control='max-age=3600',
                x_ms_blob_content_type='application/json')
        except Exception as e:
            print 'Failed to save %s/%s: %s' % (container, path, str(e))
            raise

    def saveAsText(self, rdd, container, path):
        path = path.lstrip('/')
        path = '/' + path
        print 'Saving rdd to %s%s' % (container, path)
        uri = 'wasb://%s@%s.blob.core.windows.net%s' % (
            container, self.storage_account, path)
        try:
            rdd.saveAsTextFile(uri)
        except Exception as e:
            print 'Failed to save %s%s: %s' % (container, path, str(e))
            raise

    def deleteAllBut(self, container, exceptFolderName):
        print 'deleteAllBut called'
        try:
            bloblistingresult = self.blob_service.list_blobs(container)
            for i in bloblistingresult:
                print i.name
                if not exceptFolderName in i.name:
                    try:
                        print 'deleting'
                        self.blob_service.delete_blob(container, i.name)
                        print 'deleted'
                    except Exception as e:
                        print 'Failed to delete %s/%s: %s' % (container,
                                                              i.name, str(e))
                        raise
        except Exception as e:
            print 'Failed to list things in %s: %s' % (container, str(e))
            raise
Пример #10
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755),
                         st_uid=getuid(),
                         st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                   st_uid=getuid(),
                                   st_size=0,
                                   st_mtime=int(time.time())))

        self.containers = cmap  # destroys fs tree cache resistant to misses

    def _parse_path(self, path):  # returns </dir, file(=None)>
        if path.count('/') > 1:  # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:  # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]  # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755),
                               st_size=0,
                               st_uid=getuid(),
                               st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644),
                                st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:  # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode),
                    st_size=0,
                    st_nlink=1,
                    st_uid=getuid(),
                    st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')  # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:  # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except AzureMissingResourceHttpError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except AzureException as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0  # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:  # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) *
                                                       block_size, len(data))]
                        block_id = base64.encodestring(
                            '%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except AzureException:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0  # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
Пример #11
0
class AzureFS(LoggingMixIn, Operations):
    """
    Azure Blob Storage filesystem
    """

    blobs = None
    containers = dict()  # {cname: {stat:dict, files:None|{fname: stat}}
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self._rebuild_container_list()

    def _rebuild_container_list(self):
        cmap = dict()
        cnames = set()

        for c in self.blobs.list_containers():
            cstat = make_stat(stat.S_IFDIR | 0755, c.properties)

            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={}, stat=make_stat(stat.S_IFDIR | 0755))
        self.containers = cmap  # destroys fs tree cache resistant to misses

    @staticmethod
    def _parse_path(path):  # returns </dir, file(=None)>
        if path.count('/') > 1:  # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:  # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    @staticmethod
    def _parse_container(path):
        base_container = path[1:]  # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False, force=False):
        log.debug("get_dir: contents_required=%s, force=%s,"
                  " has_container=%s, path=%s",
                  "t" if contents_required else "f",
                  "t" if force else "f",
                  "t" if path in self.containers else "f",
                  path)
        cname = self._parse_container(path)

        if 'process' in self.containers['/' + cname] and \
                self.containers['/' + cname]['process'] is not None:
            p = self.containers['/' + cname]['process']
            if not p.is_alive():
                p.join()
                self.containers['/' + cname]['process'] = None

        if not self.containers:
            log.info("get_dir: rebuilding container list")
            self._rebuild_container_list()

        if path in self.containers:
            container = self.containers[path]
            if not contents_required:
                return container
            if not force and container['files'] is not None:
                return container

        if '/' + cname not in self.containers:
            log.info("get_dir: no such container: /%s", cname)
            raise FuseOSError(errno.ENOENT)
        else:
            container = self.containers['/' + cname]
            try:
                log.info(">>>> %s - %s ",
                         cname,
                         container['process'])
            except KeyError:
                log.info(">>>> no process: %s " % cname)
            if container['files'] is None or force is True:
                # fetch contents of container
                log.info("Contents not found in the cache index: %s", cname)

                process = container.get('process', None)
                if process is not None and process.is_alive():
                    # We do nothing. Some thread is still working,
                    # getting list of blobs from the container.
                    log.info("Fetching blob list for '%s' is already"
                             " handled by %s", cname, process)
                else:
                    # No thread running for this container, launch a new one
                    m = Manager()
                    files = m.dict()
                    process = Process(target=get_files_from_blob_service,
                                      args=(self.blobs, cname, files),
                                      name="list-blobs/%s" % cname)
                    process.daemon = True
                    process.start()
                    container['process'] = process
                    log.info("Started blob list retrieval for '%s': %s",
                             cname, process)
                    container['files'] = files
            return container

    def _get_file(self, path):
        d, f = self._parse_path(path)
        log.debug("get_file: requested path=%s (d=%s, f=%s)", path, d, f)
        directory = self._get_dir(d, True)
        files = None
        if directory is not None:
            files = directory['files']
            if f in files:
                return files[f]

        if not hasattr(self, "_get_file_noent"):
            self._get_file_noent = {}

        last_check = self._get_file_noent.get(path, 0)
        if time.time() - last_check <= 30:
            # Negative TTL is 30 seconds (hardcoded for now)
            log.info("get_file: cache says to reply negative for %s", path)
            return None

        # Check if file now exists and our caches are just stale.
        try:
            c = self._parse_container(d)
            p = path[path.find('/', 1) + 1:]
            props = self.blobs.get_blob_properties(c, p)
            log.info("get_file: found locally unknown remote file %s: %s",
                     path, repr(props))

            node = make_stat(stat.S_IFREG | 0644, props)

            if node['st_size'] > 0:
                log.info("get_file: properties for %s: %s", path, repr(node))
                # Remember this, so we won't have to re-query it.
                files[f] = node
                if path in self._get_file_noent:
                    del self._get_file_noent[path]
                return node
            else:
                # TODO: FIXME: HACK: We currently ignore empty files.
                # Sometimes the file is not yet here and is still uploading.
                # Such files have "content-length: 0". Ignore those for now.
                log.warning("get_file: the file %s is not yet here (size=%s)",
                            path, node['st_size'])
                self._get_file_noent[path] = time.time()
                return None
        except AzureMissingResourceHttpError:
            log.info("get_file: remote confirms non-existence of %s", path)
            self._get_file_noent[path] = time.time()
            return None
        except AzureException as e:
            log.error("get_file: exception while querying remote for %s: %s",
                      path, repr(e))
            self._get_file_noent[path] = time.time()

        return None

    def getattr(self, path, fh=None):
        log.debug("getattr: path=%s", path)
        d, f = self._parse_path(path)

        if f is None:
            return self._get_dir(d)['stat']
        else:
            file_obj = self._get_file(path)
            if file_obj:
                return file_obj

        log.warning("getattr: no such file: %s", path)
        raise FuseOSError(errno.ENOENT)

    def mkdir(self, path, mode):
        if path.count('/') <= 1:  # create on root
            name = path[1:]
            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long")
                raise FuseOSError(errno.ENAMETOOLONG)

            if not re.match(RE_CONTAINER_NAME, name):
                log.error("Invalid container name: '%s'", name)
                raise FuseOSError(errno.EACCES)

            resp = self.blobs.create_container(name)
            if resp:
                self._rebuild_container_list()
                log.info("CONTAINER %s CREATED", name)
            else:
                log.error("Invalid container name or container already exists")
                raise FuseOSError(errno.EACCES)
        else:
            # TODO: Support 2nd+ level directory creation
            raise FuseOSError(errno.ENOSYS)

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(errno.EACCES)
        else:
            # TODO: Support 2nd+ level directories
            raise FuseOSError(errno.ENOSYS)

    def create(self, path, mode, fi=None):
        node = make_stat(stat.S_IFREG | mode)
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(errno.ENOSYS)

        if f == ".__refresh_cache__":
            log.info("Refresh cache forced (%s)" % f)
            self._get_dir(path, True, True)
            return self.open(path, data='')

        directory = self._get_dir(d, True)
        if not directory:
            raise FuseOSError(errno.EIO)
        directory['files'][f] = node

        return self.open(path, data='')  # reusing handler provider

    def open(self, path, flags=0, data=None):
        log.info("open: path=%s; flags=%s", path, flags)
        if data is None:
            # Download contents
            c_name = self._parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                self.blobs.get_blob_metadata(c_name, f_name)
            except AzureMissingResourceHttpError:
                directory = self._get_dir('/' + c_name, True)
                if f_name in directory['files']:
                    del directory['files'][f_name]
                log.info("open: remote says there is no such file: c=%s f=%s",
                         c_name, f_name)
                raise FuseOSError(errno.ENOENT)
            except AzureHttpError as e:
                log.error("Read blob failed with HTTP %d", e.status_code)
                raise FuseOSError(errno.EAGAIN)
            except AzureException as e:
                log.exception("Read blob failed with exception: %s", repr(e))
                raise FuseOSError(errno.EAGAIN)
        self.fd += 1
        return self.fd

    def truncate(self, path, length, fh=None):
        return 0  # assume done, no need

    def write(self, path, data, offset, fh=None):
        # TODO: Re-implement writing
        raise FuseOSError(errno.EPERM)

    def unlink(self, path):
        c_name = self._parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(errno.ENOENT)
        except:
            raise FuseOSError(errno.EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys()
                                  if x != '/']

        directory = self._get_dir(path, True)
        if not directory:
            log.info("readdir: no such file: %s", path)
            raise FuseOSError(errno.ENOENT)
        return ['.', '..'] + directory['files'].keys()

    def read(self, path, size, offset, fh):
        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            byte_range = "bytes=%s-%s" % (offset, offset + size - 1)
            log.debug("read range: %s", byte_range)
            data = self.blobs.get_blob(c_name, f_name, snapshot=None,
                                       x_ms_range=byte_range)
            return data
        except AzureHttpError as e:
            if e.status_code == 404:
                raise FuseOSError(errno.ENOENT)
            elif e.status_code == 403:
                raise FuseOSError(errno.EPERM)
            else:
                log.error("Read blob failed with HTTP %d", e.status_code)
                raise FuseOSError(errno.EAGAIN)

    def statfs(self, path):
        return dict(f_bsize=4096, f_blocks=1, f_bavail=sys.maxint)

    def rename(self, old, new):
        # TODO: Implement renaming
        raise FuseOSError(errno.ENOSYS)

    def symlink(self, target, source):
        raise FuseOSError(errno.ENOSYS)

    def getxattr(self, path, name, position=0):
        return ''

    def chmod(self, path, mode):
        pass

    def chown(self, path, uid, gid):
        pass
Пример #12
0
    # Get a handle on the Azure Blob Storage account
    azureStorage = BlobService(account_name=azureAccount, 
                               account_key=accountKey)

    # Create a datestring for the filenames
    dateString = date.today().strftime("%Y%m%d")

    # Create a filename string (e.g. Allergies20151103)
    targetFile = "{0}{1}".format(dataSetType, dateString)

    # Create full paths for the location
    targetIngestFullPath = "{0}/{1}.txt".format(targetIngestPath, targetFile)

    # Ensure a clean slate for pushing the new data set
    try:
        azureStorage.delete_blob(ingestContainer, targetIngestFullPath)
        theLog.write("Existing ingest blob found, deleting it\n\n")
        theLog.flush()
    except AzureMissingResourceHttpError:
        pass

    # Try to put the blob out in the wild, provide MD5 for error
    # checking since M$ didn't feel the need to implement a return
    # code for this function

    # On further testing, the "content_md5" is only for header rather
    # than the actual blob content - have to wait for these APIs to mature
    try:
        azureStorage.put_block_blob_from_path(ingestContainer,
                                              targetIngestFullPath,
                                              fullFilePath,
Пример #13
0
class Command(BaseCommand):
    help = "Synchronizes static media to cloud files."

    option_list = BaseCommand.option_list + (
        optparse.make_option('-w', '--wipe',
            action='store_true', dest='wipe', default=False,
            help="Wipes out entire contents of container first."),
        optparse.make_option('-t', '--test-run',
            action='store_true', dest='test_run', default=False,
            help="Performs a test run of the sync."),
        optparse.make_option('-c', '--container',
            dest='container', help="Override STATIC_CONTAINER."),
    )

    # settings from azurite.settings
    ACCOUNT_NAME     = AZURITE['ACCOUNT_NAME']
    ACCOUNT_KEY      = AZURITE['ACCOUNT_KEY']
    STATIC_CONTAINER = AZURITE['STATIC_CONTAINER']

    # paths
    DIRECTORY        = os.path.abspath(settings.STATIC_ROOT)
    STATIC_URL       = settings.STATIC_URL

    if not DIRECTORY.endswith('/'):
        DIRECTORY = DIRECTORY + '/'

    if STATIC_URL.startswith('/'):
        STATIC_URL = STATIC_URL[1:]

    local_object_names = []
    create_count = 0
    upload_count = 0
    update_count = 0
    skip_count = 0
    delete_count = 0
    service = None

    def handle(self, *args, **options):
        self.wipe = options.get('wipe')
        self.test_run = options.get('test_run')
        self.verbosity = int(options.get('verbosity'))
        if hasattr(options, 'container'):
            self.STATIC_CONTAINER = options.get('container')
        self.sync_files()

    def sync_files(self):
        self.service = BlobService(account_name=self.ACCOUNT_NAME,
            account_key=self.ACCOUNT_KEY)

        try:
            self.service.get_container_properties(self.STATIC_CONTAINER)
        except AzureMissingResourceHttpError:
            self.service.create_container(self.STATIC_CONTAINER,
                x_ms_blob_public_access='blob')

        self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob')

        # if -w option is provided, wipe out the contents of the container
        if self.wipe:
            blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER))

            if self.test_run:
                print "Wipe would delete %d objects." % blob_count
            else:
                print "Deleting %d objects..." % blob_count
                for blob in self.service.list_blobs(self.STATIC_CONTAINER):
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)

        # walk through the directory, creating or updating files on the cloud
        os.path.walk(self.DIRECTORY, self.upload_files, "foo")

        # remove any files on remote that don't exist locally
        self.delete_files()

        # print out the final tally to the cmd line
        self.update_count = self.upload_count - self.create_count
        print
        if self.test_run:
            print "Test run complete with the following results:"
        print "Skipped %d. Created %d. Updated %d. Deleted %d." % (
            self.skip_count, self.create_count, self.update_count, self.delete_count)

    def upload_files(self, arg, dirname, names):
        # upload or skip items
        for item in names:
            file_path = os.path.join(dirname, item)
            if os.path.isdir(file_path):
                continue # Don't try to upload directories

            object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1]
            self.local_object_names.append(object_name)

            try:
                properties = self.service.get_blob_properties(self.STATIC_CONTAINER,
                    object_name)
            except AzureMissingResourceHttpError:
                properties = {}
                self.create_count += 1

            cloud_datetime = None
            if 'last-modified' in properties:
                cloud_datetime = (properties['last-modified'] and
                                  datetime.datetime.strptime(
                                    properties['last-modified'],
                                    "%a, %d %b %Y %H:%M:%S %Z"
                                  ) or None)

            local_datetime = datetime.datetime.utcfromtimestamp(
                                               os.stat(file_path).st_mtime)

            if cloud_datetime and local_datetime < cloud_datetime:
                self.skip_count += 1
                if self.verbosity > 1:
                    print "Skipped %s: not modified." % object_name
                continue

            if not self.test_run:
                file_contents = open(file_path, 'r').read()
                content_type, encoding = mimetypes.guess_type(file_path)
                self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents,
                    x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type,
                    content_encoding=encoding)
                # sync_headers(cloud_obj)
            self.upload_count += 1
            if self.verbosity > 1:
                print "Uploaded", object_name

    def delete_files(self):
        # remove any objects in the container that don't exist locally
        for blob in self.service.list_blobs(self.STATIC_CONTAINER):
            if blob.name not in self.local_object_names:
                self.delete_count += 1
                if self.verbosity > 1:
                    print "Deleted %s" % blob.name
                if not self.test_run:
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
Пример #14
0
class Azure(object):
    '''
    A class used to connect to the Azure storage and
    upload/download files using blob storage
    '''
    def __init__(self, params={}):
        '''
        Constructor for the Azure object

        '''
        if "user" in params:
            self.user = params["user"]
        else:
            self.user = None
        if "key" in params:
            self.key = params["key"]
        else:
            self.key = None

    def connect(self, host, port, user, password, secure):
        '''
        Connect to the Azure service with given user and key
        @param user - username to use to connect to
        @param key - key to use to connect
        '''
        kwargs = {}
        err = None
        if not host is None:
            kwargs["host_base"] = "." + host
        if not user is None:
            kwargs["account_name"] = user
        elif not self.user is None:
            kwargs["account_name"] = self.user
        if not password is None:
            kwargs["account_key"] = password
        elif not self.key is None:
            kwargs["account_key"] = self.key
        kwargs["protocol"] = "https" if secure else "http"
        try:
            self.service = BlobService(**kwargs)
        except Exception as e:
            err = e.message
            self.service = None
        if self.service is None:
            raise OsakaException("Failed to connect to Azure:" +
                                 ("" if err is None else err))

    @classmethod
    def getSchemes(clazz):
        '''
        Returns a list of schemes this handler handles
        Note: handling the scheme of another handler produces unknown results
        @returns list of handled schemes
        '''
        return ["azure", "azures"]

    def close(self):
        '''
        Close this service
        '''
        pass

    def put(self, path, url):
        '''
        Put a file up to the cloud
        @param path - path to upload
        @param url - path in cloud to upload too
        '''
        if os.path.isdir(path):
            return walk(self.put, path, url)
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        self.service.create_container(cont)
        self.service.put_block_blob_from_path(cont, blob, path)
        return True

    def get(self, url, dest):
        '''
        Get file(s) from the cloud
        @param url - url on cloud to pull down (on cloud)
        @param dest - dest to download too
        '''
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        for b in self.service.list_blobs(cont, prefix=blob):
            destination = os.path.join(dest, os.path.relpath(
                b.name, blob)) if blob != b.name else dest
            if not os.path.exists(os.path.dirname(destination)):
                os.mkdir(os.path.dirname(destination))
            self.service.get_blob_to_path(cont, b.name, destination)
        return True

    def rm(self, url):
        '''
        Remove this url and all children urls
        @param url - url to remove
        '''
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        for b in self.service.list_blobs(cont, prefix=blob):
            self.service.delete_blob(cont, b.name)
        return True