def _deleteBlob(self, storageAccount, mediaLink): primary = self._getStorageAccountKey(storageAccount) # BlobServiceオブジェクトを作成 blobService = BlobService(storageAccount, primary) (container, blob) = self._getBlobFromMediaLink(blobService, mediaLink) rs = blobService.delete_blob(container_name=container.name, blob_name=blob.name) try: updatedBlob = blobService.get_blob_properties(container_name=container.name, blob_name=blob.name) except WindowsAzureMissingResourceError as e: return True return False
def uri_get_file(creds, uri, conn=None): assert uri.startswith('wabs://') url_tup = urlparse(uri) if conn is None: conn = BlobService(creds.account_name, creds.account_key, protocol='https') # Determin the size of the target blob props = conn.get_blob_properties(url_tup.netloc, url_tup.path) blob_size = int(props['content-length']) ret_size = 0 data = '' # WABS requires large files to be downloaded in 4MB chunks while ret_size < blob_size: ms_range = 'bytes={}-{}'.format(ret_size, ret_size + WABS_CHUNK_SIZE - 1) while True: # Because we're downloading in chunks, catch rate limiting and # connection errors here instead of letting them bubble up to the # @retry decorator so that we don't have to start downloading the # whole file over again. try: part = conn.get_blob(url_tup.netloc, url_tup.path, x_ms_range=ms_range) except EnvironmentError as e: if e.errno in (errno.EBUSY, errno.ECONNRESET): logger.warning( msg="retrying after encountering exception", detail=("Exception traceback:\n{0}".format( traceback.format_exception(*sys.exc_info()))), hint="") gevent.sleep(30) else: raise else: break length = len(part) ret_size += length data += part if length > 0 and length < WABS_CHUNK_SIZE: break elif length == 0: break return data
def _leaseBlob(self, storageAccount, mediaLink): primary = self._getStorageAccountKey(storageAccount) # BlobServiceオブジェクトを作成 blobService = BlobService(storageAccount, primary) (container, blob) = self._getBlobFromMediaLink(blobService, mediaLink) prop = blob.properties # Lease StatusがlockedだったらBlobのリース解放を試みる if prop.lease_status == 'locked': # unlockedの時に実行すると、 azure.WindowsAzureConflictError res = blobService.lease_blob(container_name=container.name, blob_name=blob.name, x_ms_lease_action='break') # (成功すると?){}が返ってくる updatedBlob = blobService.get_blob_properties(container_name=container.name, blob_name=blob.name)
def _deleteBlob(self, storageAccount, mediaLink): primary = self._getStorageAccountKey(storageAccount) # BlobServiceオブジェクトを作成 blobService = BlobService(storageAccount, primary) (container, blob) = self._getBlobFromMediaLink(blobService, mediaLink) rs = blobService.delete_blob(container_name=container.name, blob_name=blob.name) try: updatedBlob = blobService.get_blob_properties( container_name=container.name, blob_name=blob.name) except WindowsAzureMissingResourceError as e: return True return False
def uri_get_file(creds, uri, conn=None): assert uri.startswith('wabs://') url_tup = urlparse(uri) if conn is None: conn = BlobService(creds.account_name, creds.account_key, protocol='https') # Determin the size of the target blob props = conn.get_blob_properties(url_tup.netloc, url_tup.path) blob_size = int(props['content-length']) ret_size = 0 data = '' # WABS requires large files to be downloaded in 4MB chunks while ret_size < blob_size: ms_range = 'bytes={0}-{1}'.format(ret_size, ret_size + WABS_CHUNK_SIZE - 1) while True: # Because we're downloading in chunks, catch rate limiting and # connection errors here instead of letting them bubble up to the # @retry decorator so that we don't have to start downloading the # whole file over again. try: part = conn.get_blob(url_tup.netloc, url_tup.path, x_ms_range=ms_range) except EnvironmentError as e: if e.errno in (errno.EBUSY, errno.ECONNRESET): logger.warning( msg="retrying after encountering exception", detail=("Exception traceback:\n{0}".format( traceback.format_exception(*sys.exc_info()))), hint="") gevent.sleep(30) else: raise else: break length = len(part) ret_size += length data += part if length > 0 and length < WABS_CHUNK_SIZE: break elif length == 0: break return data
def _leaseBlob(self, storageAccount, mediaLink): primary = self._getStorageAccountKey(storageAccount) # BlobServiceオブジェクトを作成 blobService = BlobService(storageAccount, primary) (container, blob) = self._getBlobFromMediaLink(blobService, mediaLink) prop = blob.properties # Lease StatusがlockedだったらBlobのリース解放を試みる if prop.lease_status == 'locked': # unlockedの時に実行すると、 azure.WindowsAzureConflictError res = blobService.lease_blob(container_name=container.name, blob_name=blob.name, x_ms_lease_action='break') # (成功すると?){}が返ってくる updatedBlob = blobService.get_blob_properties( container_name=container.name, blob_name=blob.name)
if (targetBlob is None): logger.error('deleteOSandDataDisk.py: target blob(%s) is not found.' % mediaLink.split('://')[1]) sys.exit() #----------------- # lease開始 logger.debug("deleteOSandDataDisk.py: Blob mediaLink of OS or Data Disk leasing...(%s-->%s)" % (targetContainer.name, targetBlob.name)) prop = targetBlob.properties # Lease StatusがlockedだったらBlobのリース解放を試みる if prop.lease_status == 'locked': # unlockedの時に実行すると、 azure.WindowsAzureConflictError res = blobService.lease_blob(container_name=targetContainer.name, blob_name=targetBlob.name, x_ms_lease_action='break') # (成功すると?){}が返ってくる updatedBlob = blobService.get_blob_properties(container_name=targetContainer.name, blob_name=targetBlob.name) logger.debug("deleteOSandDataDisk.py: Blob mediaLink of OS or Data Disk leased.(%s-->%s)" % (targetContainer.name, targetBlob.name)) logger.debug("sleeping... 120sec") time.sleep(120) #----------------- # osHardDiskをOSディスクリストから削除 # if ( osHardDiskName is not None ): logger.debug("deleteOSandDataDisk.py: OS Disk deleting...(%s)" % osHardDiskName) try: systemProperty = sms.delete_disk(osHardDiskName, True) except WindowsAzureMissingResourceError as e: # クラウドサービス/デプロイメントが存在しない(_ERROR_NOT_FOUND)
class Storage(driver.Base): supports_bytes_range = True def __init__(self, path=None, config=None): self._config = config self._container = self._config.azure_storage_container protocol = 'https' if self._config.azure_use_https else 'http' acct_name = self._config.azure_storage_account_name acct_key = self._config.azure_storage_account_key self._blob = BlobService(account_name=acct_name, account_key=acct_key, protocol=protocol) self._init_container() logger.debug("Initialized azureblob storage driver") def _init_container(self): '''Initializes image container on Azure blob storage if the container does not exist. ''' created = self._blob.create_container(self._container, x_ms_blob_public_access='blob', fail_on_exist=False) if created: logger.info('Created blob container for image registry.') else: logger.debug('Registry container already exists.') return created @lru.get def get_content(self, path): try: return self._blob.get_blob(self._container, path) except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path) @lru.set def put_content(self, path, content): self._blob.put_blob(self._container, path, content, 'BlockBlob') return path def stream_read(self, path, bytes_range=None): try: f = io.BytesIO() self._blob.get_blob_to_file(self._container, path, f) if bytes_range: f.seek(bytes_range[0]) total_size = bytes_range[1] - bytes_range[0] + 1 else: f.seek(0) while True: buf = None if bytes_range: # Bytes Range is enabled buf_size = self.buffer_size if nb_bytes + buf_size > total_size: # We make sure we don't read out of the range buf_size = total_size - nb_bytes if buf_size > 0: buf = f.read(buf_size) nb_bytes += len(buf) else: # We're at the end of the range buf = '' else: buf = f.read(self.buffer_size) if not buf: break yield buf except IOError: raise exceptions.FileNotFoundError('%s is not there' % path) def stream_write(self, path, fp): self._blob.put_block_blob_from_file(self._container, path, fp) def list_directory(self, path=None): if not path.endswith('/'): path += '/' # path=a would list a/b.txt as well as 'abc.txt' blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) return [b.name for b in blobs] def exists(self, path): try: self._blob.get_blob_properties(self._container, path) return True except azure.WindowsAzureMissingResourceError: return False @lru.remove def remove(self, path): is_blob = self.exists(path) if is_blob: self._blob.delete_blob(self._container, path) return exists = False blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) for b in blobs: self._blob.delete_blob(self._container, b.name) def get_size(self, path): try: properties = self._blob.get_blob_properties(self._container, path) return int(properties['content-length']) # auto-converted to long except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path)
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, 'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above import azure.storage.blob if hasattr(azure.storage.blob, 'BlobService'): from azure.storage.blob import BlobService else: from azure.storage.blob.blockblobservice import BlockBlobService as BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException("""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) # TODO: validate container name self.container = parsed_url.path.lstrip('/') if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY']) self._create_container() elif 'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE']) else: raise BackendException( 'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.') if globals.azure_max_single_put_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_SINGLE_PUT_SIZE self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size if globals.azure_max_block_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_BLOCK_SIZE self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size def _create_container(self): try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError("Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): kwargs = {} if globals.azure_max_connections: kwargs['max_connections'] = globals.azure_max_connections # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container try: self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) except AttributeError: # Old versions use a different method name self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) try: info = {'size': int(prop.properties.content_length)} except AttributeError: # old versions directly returned the properties info = {'size': int(prop['content-length'])} return info def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
### bucket = 'crawl-data' in_progress = set() # existing = set([ x.name for x in db.list_blobs(bucket, 'common-crawl/crawl-data/CC-MAIN-2014-23/') ]) todo = [x.rstrip() for x in sys.stdin.readlines()] todo = [x for x in todo if x not in existing] ### while todo or in_progress: new_progress = set() for path in in_progress: props = db.get_blob_properties(bucket, path) if props['x-ms-copy-status'] == 'success': print '\t%s completed' % path continue new_progress.add(path) time.sleep(0.25) print 'Task queue length is %d' % len(new_progress) print 'TODO queue length is %d' % len(todo) # Populate the queue while todo and len(new_progress) < 256: path = todo.pop() # If it exists, skip it -- only add if it's missing try: resp = db.get_blob_properties(bucket, path) except WindowsAzureMissingResourceError: db.copy_blob(bucket, path,
#print "res:" #print res #print "size:" #print size #print name #/200605/20060529/KGRK/NWS_NEXRAD_NXL2LG_KGRK_20060529000000_20060529075959.tar parts = name.split('_') #print parts year = parts[4][0:4] #print year month = parts[4][4:6] #print month day = parts[4][6:8] #print day path = year + month + '/' + year + month + day + '/' + parts[ 3] + '/' + name #print path # check to see if this file is in azure, and that the file sizes match try: item = blob_service.get_blob_properties('nexradl2', path) #print item['content-length'] #print size if (int(item['content-length']) == int(size)): a = 1 #print "item matches" else: print "size,%s,%s,%s," % (size, item['content-length'], path) except WindowsAzureError as ex: #print "exception: %s" % ex print "missing,%s,0,%s," % (size, path)
class Command(BaseCommand): help = "Synchronizes static media to cloud files." option_list = BaseCommand.option_list + ( optparse.make_option('-w', '--wipe', action='store_true', dest='wipe', default=False, help="Wipes out entire contents of container first."), optparse.make_option('-t', '--test-run', action='store_true', dest='test_run', default=False, help="Performs a test run of the sync."), optparse.make_option('-c', '--container', dest='container', help="Override STATIC_CONTAINER."), ) # settings from azurite.settings ACCOUNT_NAME = AZURITE['ACCOUNT_NAME'] ACCOUNT_KEY = AZURITE['ACCOUNT_KEY'] STATIC_CONTAINER = AZURITE['STATIC_CONTAINER'] # paths DIRECTORY = os.path.abspath(settings.STATIC_ROOT) STATIC_URL = settings.STATIC_URL if not DIRECTORY.endswith('/'): DIRECTORY = DIRECTORY + '/' if STATIC_URL.startswith('/'): STATIC_URL = STATIC_URL[1:] local_object_names = [] create_count = 0 upload_count = 0 update_count = 0 skip_count = 0 delete_count = 0 service = None def handle(self, *args, **options): self.wipe = options.get('wipe') self.test_run = options.get('test_run') self.verbosity = int(options.get('verbosity')) if hasattr(options, 'container'): self.STATIC_CONTAINER = options.get('container') self.sync_files() def sync_files(self): self.service = BlobService(account_name=self.ACCOUNT_NAME, account_key=self.ACCOUNT_KEY) try: self.service.get_container_properties(self.STATIC_CONTAINER) except WindowsAzureMissingResourceError: self.service.create_container(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') # if -w option is provided, wipe out the contents of the container if self.wipe: blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER)) if self.test_run: print "Wipe would delete %d objects." % blob_count else: print "Deleting %d objects..." % blob_count for blob in self.service.list_blobs(self.STATIC_CONTAINER): self.service.delete_blob(self.STATIC_CONTAINER, blob.name) # walk through the directory, creating or updating files on the cloud os.path.walk(self.DIRECTORY, self.upload_files, "foo") # remove any files on remote that don't exist locally self.delete_files() # print out the final tally to the cmd line self.update_count = self.upload_count - self.create_count print if self.test_run: print "Test run complete with the following results:" print "Skipped %d. Created %d. Updated %d. Deleted %d." % ( self.skip_count, self.create_count, self.update_count, self.delete_count) def upload_files(self, arg, dirname, names): # upload or skip items for item in names: file_path = os.path.join(dirname, item) if os.path.isdir(file_path): continue # Don't try to upload directories object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1] self.local_object_names.append(object_name) try: properties = self.service.get_blob_properties(self.STATIC_CONTAINER, object_name) except WindowsAzureMissingResourceError: properties = {} self.create_count += 1 cloud_datetime = None if 'last-modified' in properties: cloud_datetime = (properties['last-modified'] and datetime.datetime.strptime( properties['last-modified'], "%a, %d %b %Y %H:%M:%S %Z" ) or None) local_datetime = datetime.datetime.utcfromtimestamp( os.stat(file_path).st_mtime) if cloud_datetime and local_datetime < cloud_datetime: self.skip_count += 1 if self.verbosity > 1: print "Skipped %s: not modified." % object_name continue if not self.test_run: file_contents = open(file_path, 'r').read() content_type, encoding = mimetypes.guess_type(file_path) print "content-type", content_type print "encoding", encoding self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, content_encoding=encoding) # sync_headers(cloud_obj) self.upload_count += 1 if self.verbosity > 1: print "Uploaded", object_name def delete_files(self): # remove any objects in the container that don't exist locally for blob in self.service.list_blobs(self.STATIC_CONTAINER): if blob.name not in self.local_object_names: self.delete_count += 1 if self.verbosity > 1: print "Deleted %s" % blob.name if not self.test_run: self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, 'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above from azure.storage.blob import BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException("""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' not in os.environ: raise BackendException('AZURE_ACCOUNT_KEY environment variable not set.') self.blob_service = BlobService(account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY']) # TODO: validate container name self.container = parsed_url.path.lstrip('/') try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError("Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name) def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) return {'size': int(prop['content-length'])} def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
#print com #print "res:" #print res #print "size:" #print size #print name #/200605/20060529/KGRK/NWS_NEXRAD_NXL2LG_KGRK_20060529000000_20060529075959.tar parts = name.split('_') #print parts year = parts[4][0:4] #print year month = parts[4][4:6] #print month day = parts[4][6:8] #print day path = year+month+'/'+year+month+day+'/'+parts[3]+'/'+name #print path # check to see if this file is in azure, and that the file sizes match try: item = blob_service.get_blob_properties('nexradl2',path) #print item['content-length'] #print size if (int(item['content-length']) == int(size)): a =1 #print "item matches" else: print "size,%s,%s,%s," % (size,item['content-length'],path) except WindowsAzureError as ex: #print "exception: %s" % ex print "missing,%s,0,%s," % (size,path)
class Storage(driver.Base): supports_bytes_range = True def __init__(self, path=None, config=None): self._config = config self._container = self._config.azure_storage_container protocol = 'https' if self._config.azure_use_https else 'http' acct_name = self._config.azure_storage_account_name acct_key = self._config.azure_storage_account_key self._blob = BlobService( account_name=acct_name, account_key=acct_key, protocol=protocol) self._init_container() logger.debug("Initialized azureblob storage driver") def _init_container(self): '''Initializes image container on Azure blob storage if the container does not exist. ''' created = self._blob.create_container( self._container, x_ms_blob_public_access='blob', fail_on_exist=False) if created: logger.info('Created blob container for image registry.') else: logger.debug('Registry container already exists.') return created @lru.get def get_content(self, path): try: return self._blob.get_blob(self._container, path) except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path) @lru.set def put_content(self, path, content): self._blob.put_blob(self._container, path, content, 'BlockBlob') return path def stream_read(self, path, bytes_range=None): try: f = io.BytesIO() self._blob.get_blob_to_file(self._container, path, f) if bytes_range: f.seek(bytes_range[0]) total_size = bytes_range[1] - bytes_range[0] + 1 else: f.seek(0) while True: buf = None if bytes_range: # Bytes Range is enabled buf_size = self.buffer_size if nb_bytes + buf_size > total_size: # We make sure we don't read out of the range buf_size = total_size - nb_bytes if buf_size > 0: buf = f.read(buf_size) nb_bytes += len(buf) else: # We're at the end of the range buf = '' else: buf = f.read(self.buffer_size) if not buf: break yield buf except IOError: raise exceptions.FileNotFoundError('%s is not there' % path) def stream_write(self, path, fp): self._blob.put_block_blob_from_file(self._container, path, fp) def list_directory(self, path=None): if not path.endswith('/'): path += '/' # path=a would list a/b.txt as well as 'abc.txt' blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) return [b.name for b in blobs] def exists(self, path): try: self._blob.get_blob_properties(self._container, path) return True except azure.WindowsAzureMissingResourceError: return False @lru.remove def remove(self, path): is_blob = self.exists(path) if is_blob: self._blob.delete_blob(self._container, path) return exists = False blobs = list(self._blob.list_blobs(self._container, path)) if not blobs: raise exceptions.FileNotFoundError('%s is not there' % path) for b in blobs: self._blob.delete_blob(self._container, b.name) def get_size(self, path): try: properties = self._blob.get_blob_properties(self._container, path) return int(properties['content-length']) # auto-converted to long except azure.WindowsAzureMissingResourceError: raise exceptions.FileNotFoundError('%s is not there' % path)
from CREDENTIALS import account_name, account_key db = BlobService(account_name=account_name, account_key=account_key) ### bucket = 'crawl-data' in_progress = set() # existing = set([x.name for x in db.list_blobs(bucket, 'common-crawl/crawl-data/CC-MAIN-2014-23/')]) todo = [x.rstrip() for x in sys.stdin.readlines()] todo = [x for x in todo if x not in existing] ### while todo or in_progress: new_progress = set() for path in in_progress: props = db.get_blob_properties(bucket, path) if props['x-ms-copy-status'] == 'success': print '\t%s completed' % path continue new_progress.add(path) time.sleep(0.25) print 'Task queue length is %d' % len(new_progress) print 'TODO queue length is %d' % len(todo) # Populate the queue while todo and len(new_progress) < 256: path = todo.pop() # If it exists, skip it -- only add if it's missing try: resp = db.get_blob_properties(bucket, path) except WindowsAzureMissingResourceError: db.copy_blob(bucket, path, 'https://aws-publicdatasets.s3.amazonaws.com/' + path)
class AzureBlobStorage(Storage): def __init__(self, account='nyxstorage', container='pxo'): self.base_storage_uri = 'http://%s.blob.core.windows.net/%s/' % ( account, container) self.blob_service = BlobService( account, get_env_variable('AZURE_BLOB_STORAGE_KEY')) self.container = container def _open(self, name, mode='rb'): data = self.blob_service.get_blob(self.container, name) return ContentFile(data) def _save(self, name, content): _file = content.read() file_name = content.name[-35:] self.blob_service.put_blob( self.container, file_name, _file, x_ms_blob_type='BlockBlob') return self.base_storage_uri + file_name def create_container(self, container_name): result = self.blob_service.create_container( container_name, x_ms_blob_public_access='container') return result def delete(self, name): self.blob_service.delete_blob(self.container, name) def exists(self, name): try: self.blob_service.get_blob_properties(self.container, name) except: return False else: return True def get_available_name(self, name): return name def get_blobs(self): blobs = self.blob_service.list_blobs(self.container) return blobs def get_valid_name(self, name): return name def modified_time(self, name): metadata = self.blob_service.get_blob_metadata(self.container, name) modified_time = float(metadata.get('x-ms-meta-modified_time')) return datetime.fromtimestamp(modified_time) def set_public_container(self, container_name): result = self.blob_service.set_container_acl( container_name, x_ms_blob_public_access='container') return result def size(self, name): properties = self.blob_service.get_blob_properties( self.container, name) return properties.get('content-length') def url(self, name): blob = self.blob_service.list_blobs(self.container, prefix=name) return blob.blobs[0].url
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure SDK for Python library. try: import azure from azure.storage import BlobService except ImportError: raise BackendException('Azure backend requires Microsoft Azure SDK for Python ' '(https://github.com/Azure/azure-sdk-for-python).') if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException('AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' not in os.environ: raise BackendException('AZURE_ACCOUNT_KEY environment variable not set.') account_name = os.environ['AZURE_ACCOUNT_NAME'] account_key = os.environ['AZURE_ACCOUNT_KEY'] self.WindowsAzureMissingResourceError = azure.WindowsAzureMissingResourceError self.blob_service = BlobService(account_name=account_name, account_key=account_key) # TODO: validate container name self.container = parsed_url.path.lstrip('/') try: self.blob_service.create_container(self.container, fail_on_exist=True) except azure.WindowsAzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError("Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-blob self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name) def _get(self, remote_filename, local_path): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-blob blobs = self.blob_service.list_blobs(self.container) return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) return {'size': int(prop['content-length'])} def _error_code(self, operation, e): if isinstance(e, self.WindowsAzureMissingResourceError): return log.ErrorCode.backend_not_found
class AzureBlobStorage(Storage): ''' classdocs ''' def __init__(self, azure_profile): ''' Constructor ''' if not azure_profile: raise Exception() else: container_name = azure_profile['container_name'] account_name = azure_profile['account_name'] account_key = azure_profile['key'] base_url = azure_profile['base_url'] self.blob = BlobService( account_name=account_name, account_key=account_key) self.container = container_name self.base_url = base_url def delete(self, name): """ Delete file. """ try: self.blob.delete_blob(self.container, name) except WindowsAzureMissingResourceError: return False else: return True def delete_files(self, files=None): """ Delete files in container. """ if not files: files = self.listdir(self.container)[1] for _file in files: self.delete(_file) def exists(self, name, with_properties=False): """ Existing check. """ result = False blob_properties = None try: blob_properties = self.blob.get_blob_properties( self.container, name) except WindowsAzureMissingResourceError: result = False else: result = True if with_properties: return result, blob_properties else: return result def get_available_name(self, name): return super(AzureBlobStorage, self).get_available_name(name.replace('\\', '/')) def get_valid_name(self, name): return name def _list(self, path, prefix, maxresults): result = [] blobs = self.blob.list_blobs(path, prefix, maxresults) for _blob in blobs: result.append(_blob.name) return result def listdir(self, path=None, prefix=None, maxresults=None): """ Catalog file list. """ if not path: path = self.container return [], self._list(path, prefix, maxresults) def size(self, name): """ File size. """ result, properties = self.exists(name, with_properties=True) if result: return int(properties['content-length']) else: return 0 def url(self, name, chk_exist=False): """ URL for file downloading. """ if chk_exist: if self.exists(name): return '%s%s/%s' % (self.base_url, self.container, name) else: return None else: return '%s%s/%s' % (self.base_url, self.container, name) def _open(self, name, mode='rb'): """ Open file. """ in_mem_file = StringIO.StringIO( self.blob.get_blob(self.container, name)) in_mem_file.name = name in_mem_file.mode = mode return File(in_mem_file) def _save(self, name, blob_to_upload, x_ms_blob_type='BlockBlob', content_type=None): """ Save file. """ if hasattr(blob_to_upload, 'content_type'): content_type = blob_to_upload.content_type or None if content_type is None: content_type = mimetypes.guess_type(name)[0] or None blob_to_upload.seek(0) self.blob.put_blob(self.container, name, blob_to_upload, x_ms_blob_type, x_ms_blob_content_type=content_type) return name def modified_time(self, name): """ Last modification time. """ result, properties = self.exists(name, with_properties=True) if result: date_string = properties['last-modified'] modified_dt = parser.parse(date_string) if timezone.is_naive(modified_dt): return modified_dt else: return timezone.make_naive(modified_dt, timezone.get_current_timezone()) else: return None created_time = accessed_time = modified_time
AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING) q = [] query = "SELECT * FROM files" if quick == True: query = "SELECT * FROM files where azure=''" for row in c.execute(query): print row # check to see if this file is in azure, and that the file sizes match try: item = blob_service.get_blob_properties('nexradl2',row[0]) print item['content-length'] print row[1] if (int(item['content-length']) == int(row[1])): print "item matches" q.append("update files set azure='yes' where path = '%s'" % (row[0])) else: q.append("update files set azure='' where path = '%s'" % (row[0])) except WindowsAzureError as ex: q.append("update files set azure='' where path = '%s'" % (row[0])) print "exception: %s" % ex #print item print q
class AzureBackend(duplicity.backend.Backend): u""" Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, u'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above import azure.storage.blob if hasattr(azure.storage.blob, u'BlobService'): from azure.storage.blob import BlobService else: from azure.storage.blob.blockblobservice import BlockBlobService as BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException(u"""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) # TODO: validate container name self.container = parsed_url.path.lstrip(u'/') if u'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException(u'AZURE_ACCOUNT_NAME environment variable not set.') if u'AZURE_ACCOUNT_KEY' in os.environ: if u'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], account_key=os.environ[u'AZURE_ACCOUNT_KEY'], endpoint_suffix=os.environ[u'AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], account_key=os.environ[u'AZURE_ACCOUNT_KEY']) self._create_container() elif u'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ: if u'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], sas_token=os.environ[u'AZURE_SHARED_ACCESS_SIGNATURE'], endpoint_suffix=os.environ[u'AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService(account_name=os.environ[u'AZURE_ACCOUNT_NAME'], sas_token=os.environ[u'AZURE_SHARED_ACCESS_SIGNATURE']) else: raise BackendException( u'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.') if globals.azure_max_single_put_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_SINGLE_PUT_SIZE self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size if globals.azure_max_block_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_BLOCK_SIZE self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size def _create_container(self): try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError(u"Could not create Azure container: %s" % str(e.message).split(u'\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): remote_filename = fsdecode(remote_filename) kwargs = {} if globals.azure_max_connections: kwargs[u'max_connections'] = globals.azure_max_connections # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container try: self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) except AttributeError: # Old versions use a different method name self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) self._set_tier(remote_filename) def _set_tier(self, remote_filename): if globals.azure_blob_tier is not None: try: self.blob_service.set_standard_blob_tier(self.container, remote_filename, globals.azure_blob_tier) except AttributeError: # might not be available in old API pass def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, fsdecode(remote_filename), local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, fsdecode(filename)) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, fsdecode(filename)) try: info = {u'size': int(prop.properties.content_length)} except AttributeError: # old versions directly returned the properties info = {u'size': int(prop[u'content-length'])} return info def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
class Command(BaseCommand): help = "Synchronizes static media to cloud files." option_list = BaseCommand.option_list + ( optparse.make_option( '-w', '--wipe', action='store_true', dest='wipe', default=False, help="Wipes out entire contents of container first."), optparse.make_option('-t', '--test-run', action='store_true', dest='test_run', default=False, help="Performs a test run of the sync."), optparse.make_option('-c', '--container', dest='container', help="Override STATIC_CONTAINER."), ) # settings from azurite.settings ACCOUNT_NAME = AZURITE['ACCOUNT_NAME'] ACCOUNT_KEY = AZURITE['ACCOUNT_KEY'] STATIC_CONTAINER = AZURITE['STATIC_CONTAINER'] # paths DIRECTORY = os.path.abspath(settings.STATIC_ROOT) STATIC_URL = settings.STATIC_URL if not DIRECTORY.endswith('/'): DIRECTORY = DIRECTORY + '/' if STATIC_URL.startswith('/'): STATIC_URL = STATIC_URL[1:] local_object_names = [] create_count = 0 upload_count = 0 update_count = 0 skip_count = 0 delete_count = 0 service = None def handle(self, *args, **options): self.wipe = options.get('wipe') self.test_run = options.get('test_run') self.verbosity = int(options.get('verbosity')) if hasattr(options, 'container'): self.STATIC_CONTAINER = options.get('container') self.sync_files() def sync_files(self): self.service = BlobService(account_name=self.ACCOUNT_NAME, account_key=self.ACCOUNT_KEY) try: self.service.get_container_properties(self.STATIC_CONTAINER) except WindowsAzureMissingResourceError: self.service.create_container(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') # if -w option is provided, wipe out the contents of the container if self.wipe: blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER)) if self.test_run: print "Wipe would delete %d objects." % blob_count else: print "Deleting %d objects..." % blob_count for blob in self.service.list_blobs(self.STATIC_CONTAINER): self.service.delete_blob(self.STATIC_CONTAINER, blob.name) # walk through the directory, creating or updating files on the cloud os.path.walk(self.DIRECTORY, self.upload_files, "foo") # remove any files on remote that don't exist locally self.delete_files() # print out the final tally to the cmd line self.update_count = self.upload_count - self.create_count print if self.test_run: print "Test run complete with the following results:" print "Skipped %d. Created %d. Updated %d. Deleted %d." % ( self.skip_count, self.create_count, self.update_count, self.delete_count) def upload_files(self, arg, dirname, names): # upload or skip items for item in names: file_path = os.path.join(dirname, item) if os.path.isdir(file_path): continue # Don't try to upload directories object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1] self.local_object_names.append(object_name) try: properties = self.service.get_blob_properties( self.STATIC_CONTAINER, object_name) except WindowsAzureMissingResourceError: properties = {} self.create_count += 1 cloud_datetime = None if 'last-modified' in properties: cloud_datetime = ( properties['last-modified'] and datetime.datetime.strptime(properties['last-modified'], "%a, %d %b %Y %H:%M:%S %Z") or None) local_datetime = datetime.datetime.utcfromtimestamp( os.stat(file_path).st_mtime) if cloud_datetime and local_datetime < cloud_datetime: self.skip_count += 1 if self.verbosity > 1: print "Skipped %s: not modified." % object_name continue if not self.test_run: file_contents = open(file_path, 'r').read() content_type, encoding = mimetypes.guess_type(file_path) self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, content_encoding=encoding) # sync_headers(cloud_obj) self.upload_count += 1 if self.verbosity > 1: print "Uploaded", object_name def delete_files(self): # remove any objects in the container that don't exist locally for blob in self.service.list_blobs(self.STATIC_CONTAINER): if blob.name not in self.local_object_names: self.delete_count += 1 if self.verbosity > 1: print "Deleted %s" % blob.name if not self.test_run: self.service.delete_blob(self.STATIC_CONTAINER, blob.name)