def resizeimageandputinazure (strkey, url): maxwidthandheight = 150 resize = False bytes = urllib2.urlopen(url).read() img = Image.open( io.BytesIO (bytes)) newwidth = img.width newheight = img.height if (newheight > newwidth and newheight > maxwidthandheight): heightpercent = maxwidthandheight/float(newheight) newheight = maxwidthandheight newwidth = int((float(img.width)*float(heightpercent))) resize = True elif (newwidth > newheight and newwidth > maxwidthandheight): widthpercent = maxwidthandheight/float(newwidth) newwidth = maxwidthandheight newheight = int((float(img.height)*float(widthpercent))) resize = True if resize: newimg = img.resize((newwidth, newheight), Image.ANTIALIAS) newimg.format = img.format newio = io.BytesIO() newimg.save (newio, 'JPEG') bytes = newio.getvalue() blob_service = BlobService(account_name='wanderight', account_key='gdmZeJOCx3HYlFPZZukUhHAfeGAu4cfHWGQZc3+HIpkBHjlznUDjhXMl5HWh5MgbjpJF09ZxRaET1JVF9S2MWQ==') blob_service.put_block_blob_from_bytes(config['container'], 'images/' + strkey, bytes, x_ms_blob_content_type='image/jpg', x_ms_meta_name_values={'url':url})
def download_azure_blob(account_name, account_key, file_uri, download_dir): (blob_name, container_name) = parse_blob_uri(file_uri) host_base = get_host_base_from_uri(file_uri) download_path = os.path.join(download_dir, blob_name) blob_service = BlobService(account_name, account_key, host_base=host_base) max_retry = 3 for retry in range(1, max_retry + 1): try: blob_service.get_blob_to_path(container_name, blob_name, download_path) except Exception: hutil.error('Failed to download Azure blob, retry = ' + str(retry) + ', max_retry = ' + str(max_retry)) if retry != max_retry: hutil.log('Sleep 10 seconds') time.sleep(10) else: waagent.AddExtensionEvent(name=ExtensionShortName, op=Operation.Download, isSuccess=False, message="(03303)Failed to download file from Azure Storage") raise Exception('Failed to download azure blob: ' + blob_name) waagent.AddExtensionEvent(name=ExtensionShortName, op=Operation.Download, isSuccess=True, message="(03301)Succeeded to download file from Azure Storage") return download_path
def store(image, entity, entity_id): blob_service = BlobService(account_name='shnergledata', account_key=os.environ['BLOB_KEY']) myblob = image.read() name = '/' + entity + '/' + entity_id blob_service.put_blob('images', name, myblob, x_ms_blob_type='BlockBlob') return True
def make_blob_sas_url(account_name, account_key, container_name, blob_name, permission='w', duration=16): """ Generate a Blob SAS URL to allow a client to upload a file. account_name: Storage account name. account_key: Storage account key. container_name: Storage container. blob_name: Blob name. duration: A timedelta representing duration until SAS expiration. SAS start date will be utcnow() minus one minute. Expiry date is start date plus duration. Returns the SAS URL. """ sas = SharedAccessSignature(account_name, account_key) resource_path = '%s/%s' % (container_name, blob_name) date_format = "%Y-%m-%dT%H:%M:%SZ" start = datetime.datetime.utcnow() - datetime.timedelta(minutes=5) expiry = start + datetime.timedelta(minutes=duration) sap = SharedAccessPolicy(AccessPolicy( start.strftime(date_format), expiry.strftime(date_format), permission)) sas_token = sas.generate_signed_query_string(resource_path, 'b', sap) blob_url = BlobService(account_name, account_key) url = blob_url.make_blob_url(container_name=container_name, blob_name=blob_name, sas_token=sas_token) return url
def download_azure_blob(account_name, account_key, file_uri, download_dir): waagent.AddExtensionEvent(name=ExtensionShortName, op="EnableInProgress", isSuccess=True, message="Downloading from azure blob") try: (blob_name, container_name) = parse_blob_uri(file_uri) host_base = get_host_base_from_uri(file_uri) download_path = os.path.join(download_dir, blob_name) blob_service = BlobService(account_name, account_key, host_base=host_base) except Exception as e: waagent.AddExtensionEvent(name=ExtensionShortName, op='DownloadInProgress', isSuccess=True, message='Enable failed with the azure storage error : {0}, stack trace: {1}'.format(str(e), traceback.format_exc())) hutil.error('Failed to enable the extension with error: %s, stack trace: %s' %(str(e), traceback.format_exc())) hutil.do_exit(1, 'Enable', 'error', '1', 'Enable failed: {0}'.format(e)) max_retry = 3 for retry in range(1, max_retry + 1): try: blob_service.get_blob_to_path(container_name, blob_name, download_path) except Exception: hutil.error('Failed to download Azure blob, retry = ' + str(retry) + ', max_retry = ' + str(max_retry)) if retry != max_retry: hutil.log('Sleep 10 seconds') time.sleep(10) else: waagent.AddExtensionEvent(name=ExtensionShortName, op=Operation.Download, isSuccess=False, message="(03303)Failed to download file from Azure Storage") raise Exception('Failed to download azure blob: ' + blob_name) waagent.AddExtensionEvent(name=ExtensionShortName, op=Operation.Download, isSuccess=True, message="(03301)Succeeded to download file from Azure Storage") return download_path
def delete(self): properties = self.__SMS.get_deployment_by_name(self.name, self.name) media_link = properties.role_list.roles[0].os_virtual_hard_disk.media_link storage_name = media_link[media_link.find("//") + 2:media_link.find(".blob")] from Azure.AzureVolumes.AzureVolumes import AzureVolumescls volume_service = AzureVolumescls(credentials=self._credentials) volumes = volume_service.list_volumes() volume_to_be_deleted = None for volume in volumes: if volume.instance_id == self.name: volume_to_be_deleted = volume break self.__SMS.delete_deployment(self.name, self.name) self.__SMS.delete_hosted_service(self.name) volume_to_be_deleted.delete() # delete image from storge from azure.storage import BlobService keys = self.__SMS.get_storage_account_keys(storage_name) blob_service = BlobService(account_name=storage_name, account_key=keys.storage_service_keys.primary) blob_service.delete_container(self.name, fail_not_exist=True)
def deprovision(instance_id): """ Deprovision an existing instance of this service DELETE /v2/service_instances/<instance_id>: <instance_id> is the Cloud Controller provided value used to provision the instance return: As of API 2.3, an empty JSON document is expected """ global subscription_id global cert global account_name global account_key if account_name and account_key: blob_service = BlobService(account_name, account_key) container_name = '{0}-{1}'.format(CONTAINER_NAME_PREFIX, instance_id) blob_service.delete_container(container_name) if account_name.startswith(STORAGE_ACCOUNT_NAME_PREFIX): sms = ServiceManagementService(subscription_id, cert_file) sms.delete_storage_account(account_name) return jsonify({})
def get_image(album_name, image_name, username): gallery_db = connect_to_db() albums = gallery_db.albums requested_album = albums.find_one({"name": album_name}) if not requested_album: return redirect(url_for('static', filename='image_not_found.gif')) if not (username in requested_album["write"] or username in requested_album["read"]): return redirect(url_for('static', filename='image_not_found.gif')) if image_name not in requested_album["images"]: return redirect(url_for('static', filename='image_not_found.gif')) try: stats_download_timer = stats_client.timer("download timer") # start to time the download stats_download_timer.start() blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) data = blob_service.get_blob_to_bytes(CONTAINER_NAME, image_name) response = make_response(data) response.headers["Content-Disposition"] = "filename=%s.jpg" % image_name response.headers['Content-type'] = 'image/jpeg' stats_download_timer.stop() stats_client.incr("images downloaded", 1) return response except Exception as ex: # TODO: different image in this case? stats_download_timer.stop() return redirect(url_for('static', filename='image_not_found.gif'))
def submit(): blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) # Get a SAS signature (read for 24 hours) for the input container save to a string inputsig = sasUrl(account=ACCOUNT_NAME, key=ACCOUNT_KEY, container=INPUT_CONTAINER, permission='r') # Get a SAS signature (write for 24 hours) for the output container save to a string outputsig = sasUrl(account = ACCOUNT_NAME, key = ACCOUNT_KEY, container = OUTPUT_CONTAINER, permission = 'rwl') # List all the blobs and dump the content to a string blobs = blob_service.list_blobs(INPUT_CONTAINER) bloblist = [] for blob in blobs: bloblist.append(blob.name) os.environ[SLURMDEMO_INPUTSIG] = inputsig os.environ[SLURMDEMO_OUTPUTSIG] = outputsig os.environ[SLURMDEMO_BLOBLIST] = json.dumps(bloblist) os.environ[SLURMDEMO_INPUTCONTAINER] = INPUT_CONTAINER os.environ[SLURMDEMO_OUTPUTCONTAINER] = OUTPUT_CONTAINER os.environ[SLURMDEMO_INPUTACCOUNT] = ACCOUNT_NAME os.environ[SLURMDEMO_OUTPUTACCOUNT] = ACCOUNT_NAME # Call sbatch cli = "sbatch --array=0-{nb} slurmdemo.sh".format(nb=len(bloblist)) run(cli, showoutput=True)
def handle_noargs(self, **options): try: blob_service = BlobService(AZURE_ACCOUNT_NAME, AZURE_ACCOUNT_KEY) mixes = Mix.objects.filter(archive_updated=False) c = len(mixes) i = 1 for mix in mixes: try: blob_name = "%s.%s" % (mix.uid, mix.filetype) blob = blob_service.get_blob(AZURE_CONTAINER, blob_name) if blob: download_name = smart_str('Deep South Sounds - %s.%s' % (mix.title, mix.filetype)) blob_service.set_blob_properties( AZURE_CONTAINER, blob_name, x_ms_blob_content_type='application/octet-stream', x_ms_blob_content_disposition='attachment;filename="%s"' % (download_name) ) print "Processed: %s (%d of %d)" % (mix.uid, i, c) i = i + 1 mix.archive_updated = True mix.save() else: print "No blob found for: %s" % mix.uid except WindowsAzureMissingResourceError: print "No blob found for: %s" % mix.uid except Exception, ex: print "Error processing blob %s: %s" % (mix.uid, ex.message) except Exception, ex: print "Fatal error, bailing. %s" % (ex.message)
def remove_image(album_name, username): gallery_db = connect_to_db() albums = gallery_db.albums requested_album = albums.find_one({"name": album_name}) if not requested_album: return redirect(url_for('albums', album =album_name, message="album not found")) if not username in requested_album["write"]: return redirect(url_for('albums', album = album_name, message="permission denied")) image = request.form.get('image', '') if not image: return redirect(url_for('albums', album=album_name, message="no image was chosen for removal")) blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) try: blob_service.delete_blob(CONTAINER_NAME, image) except WindowsAzureMissingResourceError: # Even if the file is not in the blob storage, we want to remove it from the album pass gallery_db.albums.update({'name': album_name}, {'$pull': {'images': image}}) # increment the counter of the removed images stats_client.incr("images removed", 1) return redirect(url_for('albums', album=album_name))
def add_image(album_name, username): gallery_db = connect_to_db() albums = gallery_db.albums requested_album = albums.find_one({"name": album_name}) if not requested_album: return redirect(url_for('albums', album =album_name, message="album not found")) if not username in requested_album["write"]: return redirect(url_for('albums', album = album_name, message="permission denied")) if 'image[]' not in request.files: return redirect(url_for('albums', album = album_name, message="no file uploaded")) for req_file in request.files.getlist('image[]'): file_name = uuid.uuid4().hex stats_upload_timer = stats_client.timer("upload timer") stats_upload_timer.start() blob_service = BlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) blob_service.put_block_blob_from_file(CONTAINER_NAME, file_name, req_file.stream) gallery_db.albums.update({'name': album_name}, {'$push': {'images': file_name}}) stats_upload_timer.stop() # increment the counter of the uploaded images stats_client.incr("images uploaded", len(request.files.getlist('image[]'))) return redirect(url_for('albums', album = album_name))
def upload_documents(): data = request.json.get('data', None) if not data: return jsonify(status=400, message='No file content passed') data = data.decode("base64") upload_handler = get_upload_handler() # force is a flag that signals to upload the current file even if it was uploaded before force = request.json.get('force', None) if force is None or force.lower() != "true": if upload_handler.is_file_already_uploaded(data, current_user.get_id()): return jsonify(status=400, message='File content was already uploaded. Force upload by adding the force boolean') blob_service = BlobService(account_name=BLOB_ACCOUNT_NAME, account_key=BLOB_ACCOUNT_KEY) filename = uuid.uuid4().hex # put the data in the container using a random filename blob_service.put_block_blob_from_bytes(BLOB_CONTAINER_NAME, filename, data) task_collection = get_db().task_collection # update the task db with the new task (which is parsing the new data file) task_id = upload_handler.update_uploaded_file(filename, data, current_user.get_id()) return jsonify(status=200, message='Task created successfully', task_id=task_id)
def test_azure_call(request): import os try: from azure.storage import BlobService bs = BlobService(os.environ["AZURE_STORAGE_ACCOUNT"], os.environ["AZURE_STORAGE_ACCESS_KEY"]) import random container_name = hex(int(random.random() * 1000000000)) bs.create_container(container_name) bs.put_blob(container_name, 'testblob', 'hello world\n', 'BlockBlob') blob = bs.get_blob(container_name, 'testblob') if blob != 'hello world\n': return HttpResponse("Failed!", status = '404') bs.delete_blob(container_name, 'testblob') bs.delete_container(container_name) return HttpResponse("Succeeded!") except: try: import traceback return HttpResponse(traceback.format_exc() + str(os.environ.keys())) except: import traceback return HttpResponse(traceback.format_exc())
class AzureTransfer(BaseTransfer): def __init__(self, account_name, account_key, container_name): BaseTransfer.__init__(self) self.account_name = account_name self.account_key = account_key self.container_name = container_name self.conn = BlobService(account_name=self.account_name, account_key=self.account_key) self.container = self.get_or_create_container(self.container_name) self.log.debug("AzureTransfer initialized") def get_metadata_for_key(self, key): key = fix_path(key) return self.list_path(key)[0]['metadata'] def list_path(self, path): return_list = [] path = fix_path(path) self.log.info("Asking for listing of: %r", path) for r in self.conn.list_blobs(self.container_name, prefix=path, delimiter="/", include="metadata"): entry = {"name": r.name, "size": r.properties.content_length, "last_modified": dateutil.parser.parse(r.properties.last_modified), "metadata": r.metadata} return_list.append(entry) return return_list def delete_key(self, key_name): key_name = fix_path(key_name) self.log.debug("Deleting key: %r", key_name) return self.conn.delete_blob(self.container_name, key_name) def get_contents_to_file(self, obj_key, filepath_to_store_to): obj_key = fix_path(obj_key) self.log.debug("Starting to fetch the contents of: %r to: %r", obj_key, filepath_to_store_to) return self.conn.get_blob_to_path(self.container_name, obj_key, filepath_to_store_to) def get_contents_to_string(self, obj_key): obj_key = fix_path(obj_key) self.log.debug("Starting to fetch the contents of: %r", obj_key) return self.conn.get_blob_to_bytes(self.container_name, obj_key), self.get_metadata_for_key(obj_key) def store_file_from_memory(self, key, memstring, metadata=None): # For whatever reason Azure requires all values to be strings at the point of sending metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items()) self.conn.put_block_blob_from_bytes(self.container_name, key, memstring, x_ms_meta_name_values=metadata_to_send) def store_file_from_disk(self, key, filepath, metadata=None): # For whatever reason Azure requires all values to be strings at the point of sending metadata_to_send = dict((str(k), str(v)) for k, v in metadata.items()) self.conn.put_block_blob_from_path(self.container_name, key, filepath, x_ms_meta_name_values=metadata_to_send) def get_or_create_container(self, container_name): start_time = time.time() self.conn.create_container(container_name) self.log.debug("Got/Created container: %r successfully, took: %.3fs", container_name, time.time() - start_time) return container_name
def main(): service = BlobService( credentials.getStorageServicesName(), credentials.getStorageServicesKey(), ) service.create_container(CONTAINER_NAME) process(service, LOCAL_BLOCK_BLOB_FILES, CONNECTION_COUNTS, is_page_blob=False) process(service, LOCAL_PAGE_BLOB_FILES, CONNECTION_COUNTS, is_page_blob=True)
def download_file(container, path, dest): blob_service = BlobService(account_name=storage_name, account_key=storage_key) loop = 0 while True: try: blob_service.get_blob_to_path(container, path, dest) break except azure.http.HTTPError as e: loop = loop + 1 if loop >= 3: return
def upload_chunck(buf, path, storagename, container, key): blob_service = BlobService(account_name=storagename, account_key=key) loop = 0; while True: try: blob_service.put_block_blob_from_bytes(container,path,buf) break except (azure.http.HTTPError, TimeoutError) as e: loop = loop + 1 if loop >= 3: raise e
def download_chunck(path, storagename, container, key): blob_service = BlobService(account_name=storagename, account_key=key) loop = 0 while True: try: return blob_service.get_blob_to_bytes(container, path) except (azure.http.HTTPError, TimeoutError) as e: loop = loop + 1 if loop >= 3: raise e
def upload_log(): blob_service = BlobService(account_name=os.getenv('ACC_NAME'), account_key=os.getenv('ACCESS_KEY')) fpath = os.path.join(os.getenv('LOGS_DIR'),"log.log") blob_service.put_block_blob_from_path( 'log', "log.log", fpath, x_ms_blob_content_type="text/plain" )
def download_blob(storage_account_name, storage_account_key, blob_uri, seqNo, command): container_name = get_container_name_from_uri(blob_uri) blob_name = get_blob_name_from_uri(blob_uri) download_dir = get_download_directory(seqNo) download_path = os.path.join(download_dir, blob_name) # Guest agent already ensure the plugin is enabled one after another. The blob download will not conflict. blob_service = BlobService(storage_account_name, storage_account_key) try: blob_service.get_blob_to_path(container_name, blob_name, download_path) except Exception, e: hutil.error("Failed to download blob with uri:" + blob_uri + "with error:" + str(e)) raise
def create_blob(blob, txt): uri = blob.uri host_base = cs.get_host_base_from_uri(uri) service = BlobService(blob.name, blob.key, host_base=host_base) container_name = cs.get_container_name_from_uri(uri) blob_name = cs.get_blob_name_from_uri(uri) service.put_block_blob_from_text(container_name, blob_name, txt)
def list_files_from_path(container, path): blob_service = BlobService(account_name=storage_name, account_key=storage_key) next_marker = None results = [] while True: blobs = blob_service.list_blobs(container, prefix=path, maxresults=2000, marker=next_marker) for blob in blobs: results.append(blob.name) next_marker = blobs.next_marker if not next_marker: break return results
def main(argv): options = _parse_options(argv) blob_service = BlobService(options[_opt_azure_acc_name], options[_opt_azure_acc_key]) _print_container_names(blob_service) blob_service.create_container(options[_opt_azure_container_name]) _print_blobs(blob_service, options[_opt_azure_container_name]) export_dir = _export_repo(options) export_zip = _zip_export(export_dir) _upload_zip(blob_service, options[_opt_azure_container_name], export_zip)
def delete_video(id): vs_operator = videostoreoperator.VideoStoreOperator() video_detils = vs_operator.get(id) vs_operator.delete(id) count = len(video_detils.chuncks) for i in range(count): chk = video_detils.chuncks[i] blob_service = BlobService(account_name=chk.storagename, account_key=chk.key) try: blob_service.delete_blob(chk.container, chk.path) except azure.WindowsAzureMissingResourceError: pass
def connect(config=False): # Connect to the cloud service. if not config: config = misc.config['_private'] from azure.storage import BlobService container = 'streams' if not 'azure' in config: return None, None blob_service = BlobService(config['azure']['storage_account_name'], config['azure']['primary_access_key']) blob_service.create_container(container, x_ms_blob_public_access='container') return blob_service, container
def _deleteBlob(self, storageAccount, mediaLink): primary = self._getStorageAccountKey(storageAccount) # BlobServiceオブジェクトを作成 blobService = BlobService(storageAccount, primary) (container, blob) = self._getBlobFromMediaLink(blobService, mediaLink) rs = blobService.delete_blob(container_name=container.name, blob_name=blob.name) try: updatedBlob = blobService.get_blob_properties(container_name=container.name, blob_name=blob.name) except WindowsAzureMissingResourceError as e: return True return False
def create_container(storage_account_name, container_name, storage_keys): """ Creates a file share in the specified Microsoft Azure Storage account. A container is like a folder within a storage account :param storage_account_name: :param container_name: :param storage_keys: :return: """ logging.info('Creating Container \'{0}\' in Storage account {1}'.format(container_name, storage_account_name)) blob_svc = BlobService(account_name=storage_account_name, account_key=storage_keys.storage_service_keys.primary) blob_svc.create_container(container_name) logging.info('Creating Container \'{0}\' in Storage account {1} complete'.format(container_name, storage_account_name))
def run(itk_source_dir, externaldata_object_store, account_name, account_key): blob_service = BlobService(account_name=account_name, account_key=account_key) blobs = blob_service.list_blobs('md5') current_blobs = [blob.name for blob in blobs] md5files = [] for root, dirnames, filenames in os.walk(itk_source_dir): for filename in fnmatch.filter(filenames, '*.md5'): md5files.append(os.path.join(root, filename)) for content_link in md5files: upload_to_azure(content_link, externaldata_object_store, blob_service, current_blobs)
def _ensureStorageContainersExist(self): """ Creates Blob storage containers required by the service. """ logger.info("Checking for existence of Blob containers.") account_name = self.config.getServiceStorageAccountName() account_key = self._getStorageAccountKey(account_name) blob_service = BlobService(account_name, account_key) name_and_access_list = [(self.config.getServicePublicStorageContainer(), 'blob'), (self.config.getServiceBundleStorageContainer(), None)] for name, access in name_and_access_list: logger.info("Checking for existence of Blob container %s.", name) blob_service.create_container(name, x_ms_blob_public_access=access, fail_on_exist=False) access_info = 'private' if access is None else 'public {0}'.format(access) logger.info("Blob container %s is ready (access: %s).", name, access_info)
blob_analysis = 'analysis' imagesQueue = 'imagesqueue' tableName = 'photos' tablePartitionKey = 'allphotos' # Get queue credentials # accountName = environ["AZURE_STORAGE_ACCOUNT"] with open("ASA.key", "r") as myfile: accountName = myfile.read().replace('\n', '') # accountKey = environ["AZURE_STORAGE_ACCESS_KEY"] with open("ASK.key", "r") as myfile: accountKey = myfile.read().replace('\n', '') # Create blob service blob_service = BlobService(account_name=accountName, account_key=accountKey) blob_service.create_container(blob_container) blob_service.create_container(blob_analysis) # Open queue with given credentials queue_service = QueueService(account_name=accountName, account_key=accountKey) # Open table service table_service = TableService(account_name=accountName, account_key=accountKey) # Analysis results results = None # Regions for analysis region = 4 # Repeat
f = 'C:/Users/' + comp_user + '/Simulations/AzureUsers.pickle' if os.path.exists(f): users = AzureUserPool.AzureUserPool() users.user_list = pickle.load(file(f)) else: users = AzureUserPool.AzureUserPool() # Create service management object subscription_id = 'a9401417-cb08-4e67-bc2a-613f49b46f8a' certificate_path = 'CURRENT_USER\\my\\AzureCertificate' sms = ServiceManagementService(subscription_id, certificate_path) # Create blob service object blob_service = BlobService( account_name='portalvhdsd3d1018q65tg3', account_key= 'cAT5jbypcHrN7sbW/CHgGFDGSvOpyhw6VE/yHubS799egkHfvPeeXuK7uzc6H2C8ZU1ALiyOFEZkjzWuSyfc+A==' ) # Test the service management object try: sms.get_subscription() except: stderr.write( "An error occurred while connecting to Azure Service Management. Please, check your service " "management certificate.") exit(1) # Check for command line arguments UI = True
class AzureStorageBlockDeviceAPI(object): """ An ``IBlockDeviceAsyncAPI`` which uses Azure Storage Backed Block Devices Current Support: Azure SMS API """ def __init__(self, **azure_config): """ :param ServiceManagement azure_client: an instance of the azure serivce managment api client. :param String service_name: The name of the cloud service :param names of Azure volumes to identify cluster :returns: A ``BlockDeviceVolume``. """ self._instance_id = self.compute_instance_id() self._azure_service_client = ServiceManagementService( azure_config['subscription_id'], azure_config['management_certificate_path']) self._service_name = azure_config['service_name'] self._azure_storage_client = BlobService( azure_config['storage_account_name'], azure_config['storage_account_key']) self._storage_account_name = azure_config['storage_account_name'] self._disk_container_name = azure_config['disk_container_name'] if azure_config['debug']: to_file(sys.stdout) def allocation_unit(self): """ 1GiB is the minimum allocation unit for azure disks return int: 1 GiB """ return int(GiB(1).to_Byte().value) def compute_instance_id(self): """ Azure Stored a UUID in the SDC kernel module. """ # Node host names should be unique within a vnet return unicode(socket.gethostname()) def create_volume(self, dataset_id, size): """ Create a new volume. :param UUID dataset_id: The Flocker dataset ID of the dataset on this volume. :param int size: The size of the new volume in bytes. :returns: A ``Deferred`` that fires with a ``BlockDeviceVolume`` when the volume has been created. """ size_in_gb = Byte(size).to_GiB().value if size_in_gb % 1 != 0: raise UnsupportedVolumeSize(dataset_id) self._create_volume_blob(size, dataset_id) label = self._disk_label_for_dataset_id(str(dataset_id)) return BlockDeviceVolume( blockdevice_id=unicode(label), size=size, attached_to=None, dataset_id=self._dataset_id_for_disk_label(label)) def destroy_volume(self, blockdevice_id): """ Destroy an existing volume. :param unicode blockdevice_id: The unique identifier for the volume to destroy. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :return: ``None`` """ log_info('Destorying block device: ' + str(blockdevice_id)) (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) request = None if lun is not None: request = \ self._azure_service_client.delete_data_disk( service_name=self._service_name, deployment_name=self._service_name, role_name=target_disk.attached_to.role_name, lun=lun, delete_vhd=True) else: if target_disk.__class__.__name__ == 'Blob': # unregistered disk self._azure_storage_client.delete_blob( self._disk_container_name, target_disk.name) else: request = self._azure_service_client.delete_disk( target_disk.name, True) if request is not None: self._wait_for_async(request.request_id, 5000) self._wait_for_detach(blockdevice_id) def attach_volume(self, blockdevice_id, attach_to): """ Attach ``blockdevice_id`` to ``host``. :param unicode blockdevice_id: The unique identifier for the block device being attached. :param unicode attach_to: An identifier like the one returned by the ``compute_instance_id`` method indicating the node to which to attach the volume. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises AlreadyAttachedVolume: If the supplied ``blockdevice_id`` is already attached. :returns: A ``BlockDeviceVolume`` with a ``host`` attribute set to ``host``. """ (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) if lun is not None: raise AlreadyAttachedVolume(blockdevice_id) log_info('Attempting to attach ' + str(blockdevice_id) + ' to ' + str(attach_to)) disk_size = self._attach_disk(blockdevice_id, target_disk, attach_to) self._wait_for_attach(blockdevice_id) log_info('disk attached') return self._blockdevicevolume_from_azure_volume( blockdevice_id, disk_size, attach_to) def detach_volume(self, blockdevice_id): """ Detach ``blockdevice_id`` from whatever host it is attached to. :param unicode blockdevice_id: The unique identifier for the block device being detached. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises UnattachedVolume: If the supplied ``blockdevice_id`` is not attached to anything. :returns: ``None`` """ (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk is None: raise UnknownVolume(blockdevice_id) if lun is None: raise UnattachedVolume(blockdevice_id) # contrary to function name it doesn't delete by default, just detachs request = \ self._azure_service_client.delete_data_disk( service_name=self._service_name, deployment_name=self._service_name, role_name=role_name, lun=lun) self._wait_for_async(request.request_id, 5000) self._wait_for_detach(blockdevice_id) def get_device_path(self, blockdevice_id): """ Return the device path that has been allocated to the block device on the host to which it is currently attached. :param unicode blockdevice_id: The unique identifier for the block device. :raises UnknownVolume: If the supplied ``blockdevice_id`` does not exist. :raises UnattachedVolume: If the supplied ``blockdevice_id`` is not attached to a host. :returns: A ``FilePath`` for the device. """ (target_disk_or_blob, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) if target_disk_or_blob is None: raise UnknownVolume(blockdevice_id) if lun is None: raise UnattachedVolume(blockdevice_id) return Lun.get_device_path_for_lun(lun) def list_volumes(self): """ List all the block devices available via the back end API. :returns: A ``list`` of ``BlockDeviceVolume``s. """ media_url_prefix = 'https://' + self._storage_account_name \ + '.blob.core.windows.net/' + self._disk_container_name disks = self._azure_service_client.list_disks() disk_list = [] all_blobs = self._get_flocker_blobs() for d in disks: if media_url_prefix not in d.media_link or \ 'flocker-' not in d.label: continue role_name = None if d.attached_to is not None \ and d.attached_to.role_name is not None: role_name = d.attached_to.role_name disk_list.append( self._blockdevicevolume_from_azure_volume( d.label, self._gibytes_to_bytes(d.logical_disk_size_in_gb), role_name)) if d.label in all_blobs: del all_blobs[d.label] for key in all_blobs: # include unregistered 'disk' blobs disk_list.append( self._blockdevicevolume_from_azure_volume( all_blobs[key].name, all_blobs[key].properties.content_length, None)) return disk_list def _attach_disk(self, blockdevice_id, target_disk, attach_to): """ Attaches disk to specified VM :param string blockdevice_id: The identifier of the disk :param DataVirtualHardDisk/Blob target_disk: The Blob or Disk to be attached :returns int: The size of the attached disk """ lun = Lun.compute_next_lun(self._azure_service_client, self._service_name, str(attach_to)) common_params = { 'service_name': self._service_name, 'deployment_name': self._service_name, 'role_name': attach_to, 'lun': lun } disk_size = None if target_disk.__class__.__name__ == 'Blob': # exclude 512 byte footer disk_size = target_disk.properties.content_length common_params['source_media_link'] = \ 'https://' + self._storage_account_name \ + '.blob.core.windows.net/' + self._disk_container_name \ + '/' + blockdevice_id common_params['disk_label'] = blockdevice_id else: disk_size = self._gibytes_to_bytes( target_disk.logical_disk_size_in_gb) common_params['disk_name'] = target_disk.name request = self._azure_service_client.add_data_disk(**common_params) self._wait_for_async(request.request_id, 5000) return disk_size def _create_volume_blob(self, size, dataset_id): # Create a new page blob as a blank disk self._azure_storage_client.put_blob( container_name=self._disk_container_name, blob_name=self._disk_label_for_dataset_id(dataset_id), blob=None, x_ms_blob_type='PageBlob', x_ms_blob_content_type='application/octet-stream', x_ms_blob_content_length=size) # for disk to be a valid vhd it requires a vhd footer # on the last 512 bytes vhd_footer = Vhd.generate_vhd_footer(size) self._azure_storage_client.put_page( container_name=self._disk_container_name, blob_name=self._disk_label_for_dataset_id(dataset_id), page=vhd_footer, x_ms_page_write='update', x_ms_range='bytes=' + str((size - 512)) + '-' + str(size - 1)) def _disk_label_for_dataset_id(self, dataset_id): """ Returns a disk label for a given Dataset ID :param unicode dataset_id: The identifier of the dataset :returns string: A string representing the disk label """ label = 'flocker-' + str(dataset_id) return label def _dataset_id_for_disk_label(self, disk_label): """ Returns a UUID representing the Dataset ID for the given disk label :param string disk_label: The disk label :returns UUID: The UUID of the dataset """ return UUID(disk_label.replace('flocker-', '')) def _get_disk_vmname_lun(self, blockdevice_id): target_disk = None target_lun = None role_name = None disk_list = self._azure_service_client.list_disks() for d in disk_list: if 'flocker-' not in d.label: continue if d.label == str(blockdevice_id): target_disk = d break if target_disk is None: # check for unregisterd disk blobs = self._get_flocker_blobs() blob = None if str(blockdevice_id) in blobs: blob = blobs[str(blockdevice_id)] return blob, None, None vm_info = None if hasattr(target_disk.attached_to, 'role_name'): vm_info = self._azure_service_client.get_role( self._service_name, self._service_name, target_disk.attached_to.role_name) for d in vm_info.data_virtual_hard_disks: if d.disk_name == target_disk.name: target_lun = d.lun break role_name = target_disk.attached_to.role_name return (target_disk, role_name, target_lun) def _get_flocker_blobs(self): all_blobs = {} blobs = self._azure_storage_client.list_blobs( self._disk_container_name, prefix='flocker-') for b in blobs: # todo - this could be big! all_blobs[b.name] = b return all_blobs def _wait_for_detach(self, blockdevice_id): role_name = '' lun = -1 timeout_count = 0 log_info('waiting for azure to ' + 'report disk as detached...') while role_name is not None or lun is not None: (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) time.sleep(1) timeout_count += 1 if timeout_count > 5000: raise AsynchronousTimeout() log_info('Disk Detached') def _wait_for_attach(self, blockdevice_id): timeout_count = 0 lun = None log_info('waiting for azure to report disk as attached...') while lun is None: (target_disk, role_name, lun) = \ self._get_disk_vmname_lun(blockdevice_id) time.sleep(.001) timeout_count += 1 if timeout_count > 5000: raise AsynchronousTimeout() def _wait_for_async(self, request_id, timeout): count = 0 result = self._azure_service_client.get_operation_status(request_id) while result.status == 'InProgress': count = count + 1 if count > timeout: log_error('Timed out waiting for async operation to complete.') raise AsynchronousTimeout() time.sleep(.001) log_info('.') result = self._azure_service_client.get_operation_status( request_id) if result.error: log_error(result.error.code) log_error(str(result.error.message)) log_error(result.status + ' in ' + str(count * 5) + 's') def _gibytes_to_bytes(self, size): return int(GiB(size).to_Byte().value) def _blockdevicevolume_from_azure_volume(self, label, size, attached_to_name): # azure will report the disk size excluding the 512 byte footer # however flocker expects the exact value it requested for disk size # so offset the reported size to flocker by 512 bytes return BlockDeviceVolume( blockdevice_id=unicode(label), size=int(size), attached_to=attached_to_name, dataset_id=self._dataset_id_for_disk_label( label)) # disk labels are formatted as flocker-<data_set_id>
class Command(BaseCommand): help = "Synchronizes static media to cloud files." option_list = BaseCommand.option_list + ( optparse.make_option( '-w', '--wipe', action='store_true', dest='wipe', default=False, help="Wipes out entire contents of container first."), optparse.make_option('-t', '--test-run', action='store_true', dest='test_run', default=False, help="Performs a test run of the sync."), optparse.make_option('-c', '--container', dest='container', help="Override STATIC_CONTAINER."), ) # settings from azurite.settings ACCOUNT_NAME = AZURITE['ACCOUNT_NAME'] ACCOUNT_KEY = AZURITE['ACCOUNT_KEY'] STATIC_CONTAINER = AZURITE['STATIC_CONTAINER'] # paths DIRECTORY = os.path.abspath(settings.STATIC_ROOT) STATIC_URL = settings.STATIC_URL if not DIRECTORY.endswith('/'): DIRECTORY = DIRECTORY + '/' if STATIC_URL.startswith('/'): STATIC_URL = STATIC_URL[1:] local_object_names = [] create_count = 0 upload_count = 0 update_count = 0 skip_count = 0 delete_count = 0 service = None def handle(self, *args, **options): self.wipe = options.get('wipe') self.test_run = options.get('test_run') self.verbosity = int(options.get('verbosity')) if hasattr(options, 'container'): self.STATIC_CONTAINER = options.get('container') self.sync_files() def sync_files(self): self.service = BlobService(account_name=self.ACCOUNT_NAME, account_key=self.ACCOUNT_KEY) try: self.service.get_container_properties(self.STATIC_CONTAINER) except WindowsAzureMissingResourceError: self.service.create_container(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob') # if -w option is provided, wipe out the contents of the container if self.wipe: blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER)) if self.test_run: print "Wipe would delete %d objects." % blob_count else: print "Deleting %d objects..." % blob_count for blob in self.service.list_blobs(self.STATIC_CONTAINER): self.service.delete_blob(self.STATIC_CONTAINER, blob.name) # walk through the directory, creating or updating files on the cloud os.path.walk(self.DIRECTORY, self.upload_files, "foo") # remove any files on remote that don't exist locally self.delete_files() # print out the final tally to the cmd line self.update_count = self.upload_count - self.create_count print if self.test_run: print "Test run complete with the following results:" print "Skipped %d. Created %d. Updated %d. Deleted %d." % ( self.skip_count, self.create_count, self.update_count, self.delete_count) def upload_files(self, arg, dirname, names): # upload or skip items for item in names: file_path = os.path.join(dirname, item) if os.path.isdir(file_path): continue # Don't try to upload directories object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1] self.local_object_names.append(object_name) try: properties = self.service.get_blob_properties( self.STATIC_CONTAINER, object_name) except WindowsAzureMissingResourceError: properties = {} self.create_count += 1 cloud_datetime = None if 'last-modified' in properties: cloud_datetime = ( properties['last-modified'] and datetime.datetime.strptime(properties['last-modified'], "%a, %d %b %Y %H:%M:%S %Z") or None) local_datetime = datetime.datetime.utcfromtimestamp( os.stat(file_path).st_mtime) if cloud_datetime and local_datetime < cloud_datetime: self.skip_count += 1 if self.verbosity > 1: print "Skipped %s: not modified." % object_name continue if not self.test_run: file_contents = open(file_path, 'r').read() content_type, encoding = mimetypes.guess_type(file_path) self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents, x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type, content_encoding=encoding) # sync_headers(cloud_obj) self.upload_count += 1 if self.verbosity > 1: print "Uploaded", object_name def delete_files(self): # remove any objects in the container that don't exist locally for blob in self.service.list_blobs(self.STATIC_CONTAINER): if blob.name not in self.local_object_names: self.delete_count += 1 if self.verbosity > 1: print "Deleted %s" % blob.name if not self.test_run: self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
class AzureBackend(duplicity.backend.Backend): """ Backend for Azure Blob Storage Service """ def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, 'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above import azure.storage.blob if hasattr(azure.storage.blob, 'BlobService'): from azure.storage.blob import BlobService else: from azure.storage.blob.blockblobservice import BlockBlobService as BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException("""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) # TODO: validate container name self.container = parsed_url.path.lstrip('/') if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException( 'AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY']) self._create_container() elif 'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE']) else: raise BackendException( 'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.' ) if globals.azure_max_single_put_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_SINGLE_PUT_SIZE self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size if globals.azure_max_block_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_BLOCK_SIZE self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size def _create_container(self): try: self.blob_service.create_container(self.container, fail_on_exist=True) except self.AzureConflictError: # Indicates that the resource could not be created because it already exists. pass except Exception as e: log.FatalError( "Could not create Azure container: %s" % unicode(e.message).split('\n', 1)[0], log.ErrorCode.connection_failed) def _put(self, source_path, remote_filename): kwargs = {} if globals.azure_max_connections: kwargs['max_connections'] = globals.azure_max_connections # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#upload-a-blob-into-a-container try: self.blob_service.create_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) except AttributeError: # Old versions use a different method name self.blob_service.put_block_blob_from_path(self.container, remote_filename, source_path.name, **kwargs) def _get(self, remote_filename, local_path): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#download-blobs self.blob_service.get_blob_to_path(self.container, remote_filename, local_path.name) def _list(self): # https://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#list-the-blobs-in-a-container blobs = [] marker = None while True: batch = self.blob_service.list_blobs(self.container, marker=marker) blobs.extend(batch) if not batch.next_marker: break marker = batch.next_marker return [blob.name for blob in blobs] def _delete(self, filename): # http://azure.microsoft.com/en-us/documentation/articles/storage-python-how-to-use-blob-storage/#delete-blobs self.blob_service.delete_blob(self.container, filename) def _query(self, filename): prop = self.blob_service.get_blob_properties(self.container, filename) try: info = {'size': int(prop.properties.content_length)} except AttributeError: # old versions directly returned the properties info = {'size': int(prop['content-length'])} return info def _error_code(self, operation, e): if isinstance(e, self.AzureMissingResourceError): return log.ErrorCode.backend_not_found
from azure.storage import BlobService from util import echo try: import simplejson as json except: import json from dateutil import datestampToEpoch, datestampToDatestring, datestringToEpoch, datestringToDatestamp myaccount="karmadigstorage" mykey="TJbdTjRymbBHXLsDtF/Nx3+6WXWN0uwh3RG/8GPJQRQyqg+rkOzioczm5czPtr+auGFhNeBx8GTAfuCufRyw8A==" mycontainer='arch' mycontainer='istr-memex-small-ads' mycontainer='memex-small' bs = BlobService(account_name=myaccount, account_key=mykey) def publishAzureBlob(tbl='backpage_incoming', source="backpage", limit=2, user='******', password='******', # host='karma-dig-db.cloudapp.net', dbhost='karma-dig-db.cloudapp.net', database='memex_small', maxAttempts = 3): cnx = mysql.connector.connect(user=user, password=password, host=dbhost, database=database) cursor = cnx.cursor()
def __init__(self, parsed_url): duplicity.backend.Backend.__init__(self, parsed_url) # Import Microsoft Azure Storage SDK for Python library. try: import azure import azure.storage if hasattr(azure.storage, 'BlobService'): # v0.11.1 and below from azure.storage import BlobService self.AzureMissingResourceError = azure.WindowsAzureMissingResourceError self.AzureConflictError = azure.WindowsAzureConflictError else: # v1.0.0 and above import azure.storage.blob if hasattr(azure.storage.blob, 'BlobService'): from azure.storage.blob import BlobService else: from azure.storage.blob.blockblobservice import BlockBlobService as BlobService self.AzureMissingResourceError = azure.common.AzureMissingResourceHttpError self.AzureConflictError = azure.common.AzureConflictHttpError except ImportError as e: raise BackendException("""\ Azure backend requires Microsoft Azure Storage SDK for Python (https://pypi.python.org/pypi/azure-storage/). Exception: %s""" % str(e)) # TODO: validate container name self.container = parsed_url.path.lstrip('/') if 'AZURE_ACCOUNT_NAME' not in os.environ: raise BackendException( 'AZURE_ACCOUNT_NAME environment variable not set.') if 'AZURE_ACCOUNT_KEY' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], account_key=os.environ['AZURE_ACCOUNT_KEY']) self._create_container() elif 'AZURE_SHARED_ACCESS_SIGNATURE' in os.environ: if 'AZURE_ENDPOINT_SUFFIX' in os.environ: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE'], endpoint_suffix=os.environ['AZURE_ENDPOINT_SUFFIX']) else: self.blob_service = BlobService( account_name=os.environ['AZURE_ACCOUNT_NAME'], sas_token=os.environ['AZURE_SHARED_ACCESS_SIGNATURE']) else: raise BackendException( 'Neither AZURE_ACCOUNT_KEY nor AZURE_SHARED_ACCESS_SIGNATURE environment variable not set.' ) if globals.azure_max_single_put_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_SINGLE_PUT_SIZE self.blob_service.MAX_SINGLE_PUT_SIZE = globals.azure_max_single_put_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_DATA_SIZE = globals.azure_max_single_put_size if globals.azure_max_block_size: # check if we use azure-storage>=0.30.0 try: _ = self.blob_service.MAX_BLOCK_SIZE self.blob_service.MAX_BLOCK_SIZE = globals.azure_max_block_size # fallback for azure-storage<0.30.0 except AttributeError: self.blob_service._BLOB_MAX_CHUNK_DATA_SIZE = globals.azure_max_block_size
from azure.storage import BlobService from azure import WindowsAzureError import os import sys # # sys.argv[1]: inventory.csv # if len(sys.argv) > 1: inventory_name = sys.argv[1] AZURE_STORAGE_CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] blob_service = BlobService(connection_string=AZURE_STORAGE_CONNECTION_STRING) with open(inventory_name) as f: for line in f: com = line.strip().split(',') name = com[0] if name == "#FILENAME": continue size = com[1] #print "com:" #print com #print "res:" #print res #print "size:" #print size #print name #/200605/20060529/KGRK/NWS_NEXRAD_NXL2LG_KGRK_20060529000000_20060529075959.tar
def uri_put_file(creds, uri, fp, content_encoding=None): assert fp.tell() == 0 assert uri.startswith('wabs://') def log_upload_failures_on_error(exc_tup, exc_processor_cxt): def standard_detail_message(prefix=''): return (prefix + ' There have been {n} attempts to upload ' 'file {url} so far.'.format(n=exc_processor_cxt, url=uri)) typ, value, tb = exc_tup del exc_tup # Screen for certain kinds of known-errors to retry from if issubclass(typ, socket.error): socketmsg = value[1] if isinstance(value, tuple) else value logger.info( msg='Retrying upload because of a socket error', detail=standard_detail_message( "The socket error's message is '{0}'.".format(socketmsg))) else: # For all otherwise untreated exceptions, report them as a # warning and retry anyway -- all exceptions that can be # justified should be treated and have error messages # listed. logger.warning( msg='retrying file upload from unexpected exception', detail=standard_detail_message( 'The exception type is {etype} and its value is ' '{evalue} and its traceback is {etraceback}'.format( etype=typ, evalue=value, etraceback=''.join(traceback.format_tb(tb))))) # Help Python GC by resolving possible cycles del tb # Because we're uploading in chunks, catch rate limiting and # connection errors which occur for each individual chunk instead of # failing the whole file and restarting. @retry(retry_with_count(log_upload_failures_on_error)) def upload_chunk(chunk, block_id): check_sum = base64.encodestring(md5(chunk).digest()).strip('\n') conn.put_block(url_tup.netloc, url_tup.path, chunk, block_id, content_md5=check_sum) url_tup = urlparse(uri) kwargs = dict(x_ms_blob_type='BlockBlob') if content_encoding is not None: kwargs['x_ms_blob_content_encoding'] = content_encoding conn = BlobService(creds.account_name, creds.account_key, protocol='https') conn.put_blob(url_tup.netloc, url_tup.path, '', **kwargs) # WABS requires large files to be uploaded in 4MB chunks block_ids = [] length, index = 0, 0 pool_size = os.getenv('WABS_UPLOAD_POOL_SIZE', 5) p = gevent.pool.Pool(size=pool_size) while True: data = fp.read(WABS_CHUNK_SIZE) if data: length += len(data) block_id = base64.b64encode(str(index)) p.wait_available() p.spawn(upload_chunk, data, block_id) block_ids.append(block_id) index += 1 else: p.join() break conn.put_block_list(url_tup.netloc, url_tup.path, block_ids) # To maintain consistency with the S3 version of this function we must # return an object with a certain set of attributes. Currently, that set # of attributes consists of only 'size' return _Key(size=len(data))
def do_lzop_get(creds, url, path, decrypt): """ Get and decompress a S3 URL This streams the content directly to lzop; the compressed version is never stored on disk. """ assert url.endswith('.lzo'), 'Expect an lzop-compressed file' assert url.startswith('wabs://') conn = BlobService(creds.account_name, creds.account_key, protocol='https') def log_wal_fetch_failures_on_error(exc_tup, exc_processor_cxt): def standard_detail_message(prefix=''): return (prefix + ' There have been {n} attempts to fetch wal ' 'file {url} so far.'.format(n=exc_processor_cxt, url=url)) typ, value, tb = exc_tup del exc_tup # Screen for certain kinds of known-errors to retry from if issubclass(typ, socket.error): socketmsg = value[1] if isinstance(value, tuple) else value logger.info( msg='Retrying fetch because of a socket error', detail=standard_detail_message( "The socket error's message is '{0}'.".format(socketmsg))) else: # For all otherwise untreated exceptions, report them as a # warning and retry anyway -- all exceptions that can be # justified should be treated and have error messages # listed. logger.warning( msg='retrying WAL file fetch from unexpected exception', detail=standard_detail_message( 'The exception type is {etype} and its value is ' '{evalue} and its traceback is {etraceback}'.format( etype=typ, evalue=value, etraceback=''.join(traceback.format_tb(tb))))) # Help Python GC by resolving possible cycles del tb @retry(retry_with_count(log_wal_fetch_failures_on_error)) def download(): with open(path, 'wb') as decomp_out: pipeline = get_download_pipeline(PIPE, decomp_out, decrypt) g = gevent.spawn(write_and_return_error, url, conn, pipeline.stdin) try: # Raise any exceptions from _write_and_close g.get() except WindowsAzureMissingResourceError: # Short circuit any re-try attempts under certain race # conditions. logger.warn( msg=('could no longer locate object while performing ' 'wal restore'), detail=('The URI at {url} no longer exists.'.format( url=url)), hint=('This can be normal when Postgres is trying to ' 'detect what timelines are available during ' 'restoration.')) return False pipeline.finish() logger.info( msg='completed download and decompression', detail='Downloaded and decompressed "{url}" to "{path}"'. format(url=url, path=path)) return True return download()
def __init__(self, name, key, container): self.container = container self.blobstore = BlobService(name, key) self.blobstore.create_container(self.container)
def generate_and_upload(gauge_factory, config, logger): start = datetime.datetime.now() twitter_followers = gauge_factory('twitter.followers') twitter_tweets = gauge_factory('twitter.tweets') fb_friends = gauge_factory('facebook.friends') foursq_checkins = gauge_factory('foursquare.checkins') klout_score = gauge_factory('klout.score') runkeeper_activities = gauge_factory('runkeeper.activities') runkeeper_calories = gauge_factory('runkeeper.calories_burned') runkeeper_weight = gauge_factory('runkeeper.weight') tmp102_celsius = gauge_factory('tmp102.te mperature', gauge_type='hourly') lastfm_listened = gauge_factory('lastfm.listened') jawbone_sleeps = gauge_factory('jawbone.sleeps') jawbone_steps = gauge_factory('jawbone.steps') jawbone_caffeine = gauge_factory('jawbone.caffeine') data = {} data_sources = [ # (output key, gauge, days back, aggregator, postprocessors) ('twitter.followers', twitter_followers, 30, None, [zero_fill_daily, interpolators.linear]), ('twitter.tweets', twitter_tweets, 20, None, [zero_fill_daily]), ('facebook.friends', fb_friends, 180, monthly_max, None), ('foursquare.checkins', foursq_checkins, 14, None, [zero_fill_daily]), ('lastfm.listened', lastfm_listened, 14, None, [zero_fill_daily]), ('klout.score', klout_score, 30, weekly_max, [zero_fill_weekly, interpolators.linear]), ('runkeeper.calories', runkeeper_calories, 60, weekly_sum, [zero_fill_weekly]), ('runkeeper.activities', runkeeper_activities, 60,weekly_sum, [zero_fill_weekly]), ('runkeeper.weight', runkeeper_weight, 180,weekly_min, [zero_fill_weekly, interpolators.linear]), ('sleeps', jawbone_sleeps, 14, None, [zero_fill_daily, interpolators.linear]), ('steps', jawbone_steps, 14, None, [zero_fill_daily, interpolators.linear]), ('caffeine', jawbone_caffeine, 30, None, [zero_fill_daily]), ('tmp102.temperature', tmp102_celsius, 2.5, None, None) ] for ds in data_sources: data[ds[0]] = ds[1].aggregate(today_utc() - timedelta(days=ds[2]), aggregator=ds[3], post_processors=ds[4]) report = { 'generated': str(now_utc()), 'data': data, 'took': (datetime.datetime.now() - start).seconds } report_json = json.dumps(report, indent=4, default=json_date_serializer) report_content = '{0}({1})'.format(JSONP_CALLBACK_NAME, report_json) blob_service = BlobService(config['azure.account'], config['azure.key']) blob_service.create_container(config['azure.blob.container']) blob_service.set_container_acl(config['azure.blob.container'], x_ms_blob_public_access='container') blob_service.put_blob(config['azure.blob.container'], config['azure.blob.name'], report_content, 'BlockBlob') took = (datetime.datetime.now() - start).seconds logger.info('Report generated and uploaded. Took {0} s.'.format(took))
class RoundTripTests(unittest.TestCase): def setUp(self): self.workspace = Workspace( settings.workspace.id, settings.workspace.token, settings.workspace.endpoint ) self.blob = BlobService( settings.storage.account_name, settings.storage.account_key ) def _write_blob_contents(self, filename, data): if settings.diagnostics.write_blob_contents: with open('original-blob-' + filename, 'wb') as data_file: data_file.write(data) def _write_serialized_frame(self, filename, data): if settings.diagnostics.write_serialized_frame: with open('serialized-frame-' + filename, 'wb') as data_file: data_file.write(data) def test_download_blob_then_upload_as_dataframe_then_read_dataset(self): def datatypeid_from_header_and_format(header, format): if format == 'csv': if header == 'wh': return DataTypeIds.GenericCSV else: return DataTypeIds.GenericCSVNoHeader elif format == 'tsv': if header == 'wh': return DataTypeIds.GenericTSV else: return DataTypeIds.GenericTSVNoHeader elif format == 'txt': return DataTypeIds.PlainText else: self.assertTrue(False, 'Unexpected format') def split_blob_name(blob_name): # blob naming convention: # name_<header>.<format> # <header>: WH: with header # NH: no header # <format>: CSV: comma separated # TSV: tab separated # TXT: newline separated name, format = blob_name.lower().split('.') if format != 'txt': name, header = name.split('_') else: header = 'nh' return name, format, header for blob_name in settings.storage.blobs: print(blob_name) name, format, header = split_blob_name(blob_name) # Read the data from blob storage original_data = self.blob.get_blob_to_bytes(settings.storage.container, blob_name) self._write_blob_contents(blob_name, original_data) # Parse the data to a dataframe using Pandas original_dataframe = pd.read_csv( BytesIO(original_data), header=0 if header == 'wh' else None, sep=',' if format == 'csv' else '\t' if format == 'tsv' else '\n', encoding='utf-8-sig' ) # Upload the dataframe as a new dataset dataset_name = 'unittest' + name + id_generator() description = 'safe to be deleted - ' + dataset_name data_type_id = datatypeid_from_header_and_format(header, format) self.workspace.datasets.add_from_dataframe( original_dataframe, data_type_id, dataset_name, description, ) # Get the new dataset dataset = self.workspace.datasets[dataset_name] self.assertIsNotNone(dataset) # Read the dataset as a dataframe result_data = dataset.read_as_binary() self._write_serialized_frame(blob_name, result_data) result_dataframe = dataset.to_dataframe() # Verify that the dataframes are equal assert_frame_equal(original_dataframe, result_dataframe) def test_azureml_example_datasets(self): max_size = 10 * 1024 * 1024 skip = [ 'Restaurant feature data', 'IMDB Movie Titles', 'Book Reviews from Amazon', ] for dataset in self.workspace.example_datasets: if not hasattr(dataset, 'to_dataframe'): print('skipped (unsupported format): {0}'.format(dataset.name)) continue if dataset.size > max_size: print('skipped (max size): {0}'.format(dataset.name)) continue if dataset.name in skip: print('skipped: {0}'.format(dataset.name)) continue print('downloading: ' + dataset.name) frame = dataset.to_dataframe() print('uploading: ' + dataset.name) dataset_name = 'unittest' + dataset.name + id_generator() description = 'safe to be deleted - ' + dataset_name self.workspace.datasets.add_from_dataframe(frame, dataset.data_type_id, dataset_name, description)